def main():
    print "Looking for problematic inbox elements..."
    problemRequests = getProblematicRequests()
    print "Found %d bad elements:" % len(problemRequests)
    if not problemRequests:
        print "Nothing to fix, contact a developer if the problem persists..."
        return 0
    for request in problemRequests:
        print request["RequestName"]
    var = raw_input("Can we close these for new data in inbox elements: Y/N\n")
    if var == "Y":
        print "Updating them in global inbox, you need a WMAgent proxy for this."
        inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb')
        for request in problemRequests:
            inboxDB.document(request._id)
            inboxDB.updateDocument(request._id,
                                   'WorkQueue',
                                   'in-place',
                                   fields={'OpenForNewData': false})
        print "Done with the deletions, this should fix the problem."
        return 0
    else:
        var = raw_input("Then can we delete these inbox elements: Y/N\n")
        if var == "Y":
            print "Deleting them from the global inbox, you need a WMAgent proxy for this."
            inboxDB = Database('workqueue_inbox',
                               'https://cmsweb.cern.ch/couchdb')
            for request in problemRequests:
                inboxDB.delete_doc(request._id, request.rev)
            print "Done with the deletions, this should fix the problem."
            return 0
        else:
            print "Doing nothing as you commanded..."
        return 0
def main():
    print "Looking for problematic inbox elements..."
    problemRequests = getProblematicRequests()
    print "Found %d bad elements:" % len(problemRequests)
    if not problemRequests:
        print "Nothing to fix, contact a developer if the problem persists..."
        return 0
    for request in problemRequests:
        print request["RequestName"]
    var = raw_input("Can we close these for new data in inbox elements: Y/N\n")
    if var == "Y":
        print "Updating them in global inbox, you need a WMAgent proxy for this."
        inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb')
        for request in problemRequests:
            inboxDB.document(request._id)
            inboxDB.updateDocument(request._id, 'WorkQueue', 'in-place', fields={'OpenForNewData': false})
        print "Done with the deletions, this should fix the problem."
        return 0
    else:
        var = raw_input("Then can we delete these inbox elements: Y/N\n")
        if var == "Y":
            print "Deleting them from the global inbox, you need a WMAgent proxy for this."
            inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb')
            for request in problemRequests:
                inboxDB.delete_doc(request._id, request.rev)
            print "Done with the deletions, this should fix the problem."
            return 0
        else:
            print "Doing nothing as you commanded..."
        return 0
Exemple #3
0
def requestDetails(requestName):
    """ Adds details from the Couch document as well as the database """
    WMCore.Lexicon.identifier(requestName)
    request = GetRequest.getRequestDetails(requestName)
    helper = loadWorkload(request)
    schema = helper.data.request.schema.dictionary_whole_tree_()
    # take the stuff from the DB preferentially
    schema.update(request)
    task = helper.getTopLevelTask()[0]
    
    schema['Site Whitelist']  = task.siteWhitelist()
    schema['Site Blacklist']  = task.siteBlacklist()
    schema['MergedLFNBase']   = str(helper.getMergedLFNBase())
    schema['UnmergedLFNBase'] = str(helper.getUnmergedLFNBase())
    schema['Campaign']        = str(helper.getCampaign()) 
    schema['AcquisitionEra']  = str(helper.getAcquisitionEra())
    if schema['SoftwareVersions'] == ['DEPRECATED']:
        schema['SoftwareVersions'] = helper.getCMSSWVersions()

    # Check in the CouchWorkloadDBName if not present
    schema.setdefault("CouchWorkloadDBName", "reqmgr_workload_cache")

    # get DbsUrl from CouchDB
    if schema.get("CouchWorkloadDBName", None) and schema.get("CouchURL", None):
        couchDb = Database(schema["CouchWorkloadDBName"], schema["CouchURL"])
        couchReq = couchDb.document(requestName)
        schema["DbsUrl"] = couchReq.get("DbsUrl", None)
        
    # https://github.com/dmwm/WMCore/issues/4588
    schema["SubscriptionInformation"] = helper.getSubscriptionInformation()
    return schema
def main():
    config = loadConfigurationFile(os.environ['WMAGENT_CONFIG'])
    config.CoreDatabase.dialect = 'oracle'
    init = WMInit()
    init.setDatabaseConnection(config.CoreDatabase.connectUrl,
                               config.CoreDatabase.dialect)
    couchDB = Database('wmagent_jobdump/fwjrs', '')
    couchDB2 = Database('wmagent_jobdump/jobs', '')
    myThread = threading.currentThread()
    daofactory = DAOFactory(package = "WMCore.WMBS",
                            logger = logging,
                            dbinterface = myThread.dbi)
    getJobsDAO = daofactory(classname = "Jobs.GetAllJobs")
    completedJobs = getJobsDAO.execute(state = 'complete')
    candidates = []
    while len(completedJobs):
        candidates = []
        chunk = completedJobs[:500]
        completedJobs = completedJobs[500:]
        result = couchDB.loadView('FWJRDump', 'outputByJobID', keys = chunk)
        rows = result['rows']
        for entry in rows:
            candidates.append(entry['key'])
        for jobId in candidates:
            doc = couchDB2.document(str(jobId))
            last = max(map(int, doc['states'].keys()))
            lastState = doc['states'][str(last)]['newstate']
            if lastState == 'success':
                print jobId
def checkForMissingFiles(options):
    #Initialize stuff
    phedexAPI = PhEDEx({'cachepath' : options.cachepath})
    acdcCouch = Database('wmagent_acdc', options.acdcUrl)

    #Let's get the IDs of the ACDC documents for the task/request/group/user
    array = [options.group, options.user, options.request, options.task]
    result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array])

    documentsIDs = [x['id'] for x in result['rows']]
    
    badFiles = {}

    #Go through the documents
    for docID in documentsIDs:
        doc = acdcCouch.document(docID)

        #Are we going to change this doc? Better back it up
        if options.change:
            backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w')
            json.dump(doc, backupFile)
            backupFile.close()

        #Go through the files
        files = doc["files"]
        for inputFile in files:

            #Use PhEDEx API to get site based on the SE
            se = files[inputFile]["locations"][0]
            siteLocation = phedexAPI.getBestNodeName(se)

            #Now get the PFN
            pfnDict = phedexAPI.getPFN(siteLocation, inputFile)
            inputPfn = pfnDict[(siteLocation, inputFile)]

            #Run lcg-ls commands and see what we get
            command = 'lcg-ls -b -D srmv2 --srm-timeout 60 %s' % inputPfn
            
            commandList = shlex.split(command)
            try:
                (stdout, stderr, exitCode) = runCommand(commandList, False, 70)
            except Exception, ex:
                exitCode = 99999
                stdout = ''
                stderr = str(ex)
            
            if exitCode:
                #Something went wrong with the command
                #Mark the file as bad
                if docID not in badFiles:
                    badFiles[docID] = []
                badFiles[docID].append(inputFile)
                print 'File %s is thought to be bad' % inputFile
                print 'Command was %s' % command
                print 'Return code was %i' % exitCode
                print 'Stdout was %s' % stdout
                print 'Stderr was %s' % stderr
def main():
    reader = WMStatsReader("http://dummy.cern.ch:5984", "wmagent_summary")
    wmstats = Database('wmagent_summary', 'http://dummy.cern.ch:5984')
    suspiciousWorkflows = reader.workflowsByStatus(["Processing Done"], stale = False)
    for entry in suspiciousWorkflows:
        requestDoc = wmstats.document(entry)
        statusList = requestDoc['request_status']
        if statusList[-2]['status'] == 'normal-archived':
            statusList = statusList[:-1]
            requestDoc['request_status'] = statusList
            wmstats.queue(requestDoc)
            
    wmstats.commit()
def main():
    if len(sys.argv) < 2:
        print("Takes 1 input argument - dump of Oracle reqmgr_request "
              "table in a Python dictionary.")
        sys.exit(1)

    print("Creating database connection ...")
    # couch_server = CouchServer(couch_url)
    db = Database(couch_db_name, couch_url)
    execfile(sys.argv[1], globals())
    oracle_requests = reqmgr_request  # read from the input file

    print("Oracle requests: %s" % len(oracle_requests))

    print("Retrieving data from CouchDB ...")
    couch_requests = db.allDocs()
    couch_request_names = []
    for row in couch_requests["rows"]:
        if row["id"].startswith("_design"): continue
        couch_request_names.append(row["id"])
    print("CouchDB requests: %s" % len(couch_request_names))

    print("Comparing Oracle and CouchDB requests ...")
    not_present_in_couch = []
    for request in oracle_requests:
        oracle_request_name = request["REQUEST_NAME"]
        # remove first occurrence of value. Raises ValueError if not present
        try:
            couch_request_names.remove(oracle_request_name)
        except ValueError:
            not_present_in_couch.append(oracle_request_name)

    print("CouchDB requests not present in Oracle:")
    print("%s requests" % len(couch_request_names))
    for name in couch_request_names:
        request = db.document(name)
        if name != request["RequestName"] or name != request["_id"]:
            print(
                "\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" %
                (request["_id"], request["RequestName"], name))
        print("%s  %s  %s" % (request["RequestName"], request["RequestType"],
                              request["RequestStatus"]))
    print("\n\n")
    print("Oracle requests not present in CouchDB:")
    print("%s requests" % len(not_present_in_couch))
    for name in not_present_in_couch:
        print(name)
def main():
    if len(sys.argv) < 2:
        print ("Takes 1 input argument - dump of Oracle reqmgr_request "
               "table in a Python dictionary.")
        sys.exit(1)

    print "Creating database connection ..."
    # couch_server = CouchServer(couch_url)
    db = Database(couch_db_name, couch_url)
    execfile(sys.argv[1], globals())
    oracle_requests = reqmgr_request # read from the input file
    
    print "Oracle requests: %s" % len(oracle_requests)

    print "Retrieving data from CouchDB ..."
    couch_requests = db.allDocs()
    couch_request_names = []
    for row in couch_requests["rows"]:
        if row["id"].startswith("_design"): continue
        couch_request_names.append(row["id"])
    print "CouchDB requests: %s" % len(couch_request_names)

    print "Comparing Oracle and CouchDB requests ..."
    not_present_in_couch = []
    for request in oracle_requests:
        oracle_request_name = request["REQUEST_NAME"]
        # remove first occurrence of value. Raises ValueError if not present
        try:
            couch_request_names.remove(oracle_request_name)
        except ValueError:
            not_present_in_couch.append(oracle_request_name)


    print "CouchDB requests not present in Oracle:"
    print "%s requests" % len(couch_request_names)
    for name in couch_request_names:
        request = db.document(name)
        if name != request["RequestName"] or name != request["_id"]:
            print ("\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" %
                   (request["_id"], request["RequestName"], name))
        print "%s  %s  %s" % (request["RequestName"], request["RequestType"],
                request["RequestStatus"])
    print "\n\n"
    print "Oracle requests not present in CouchDB:"
    print "%s requests" % len(not_present_in_couch)
    for name in not_present_in_couch:
        print name
def findParentJobs(jobId):
    # Connect to the Job and FWJR DBs
    jobDB = Database('wmagent_jobdump/jobs', 'http://dummy.cern.ch:5984')
    fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984')

    # Get the document of the child job
    childJobDoc = jobDB.document(id = jobId)

    # Get the workflow and input files, transforms it into suitable keys [workflow, lfn]
    workflow = childJobDoc['workflow']
    inputLfns = [x['lfn'] for x in childJobDoc['inputfiles']]
    keys = [[workflow, x] for x in inputLfns]

    # Get the jobs that produced the input files for this job
    # Load the id and fwjr for these jobs since we have to re-run them
    result = fwjrDB.loadView('FWJRDump', 'jobsByOutputLFN', {}, keys)
    for entry in result['rows']:
        key = entry['key']
        jobId = entry['value']
        fwjrId = entry['id']
        result = fwjrDB.loadView('FWJRDump', 'logArchivesByJobID', {}, [[int(x) for x in fwjrId.split('-')]])
        logArch = result['rows'][0]['value']['lfn']

        # Check whether the logArch is in some LogCollect
        logCollectTarball = ''
        result = jobDB.loadView('JobDump', 'jobsByInputLFN', {}, [[workflow, logArch]])
        if result['rows']:
            logCollectJobId = result['rows'][0]['id']
            result = fwjrDB.loadView('FWJRDump', 'outputByJobID', {}, [int(logCollectJobId)])
            if result['rows']:
                logCollectTarball = result['rows'][0]['value']['lfn']
            else:
                print "WARNING: The logArchive for job %s was in a LogCollect job but not tarball was produced" % jobId

        # Print out the information
        print "Job %s produced %s, the logArch for it is %s in %s" % (jobId, key[1], logArch, logCollectTarball)

    return
Exemple #10
0
def update_software(config_file):
    """
    Functions retrieves CMSSW versions and scramarchs from CMS tag collector.

    """
    config = loadConfigurationFile(config_file)
    # source of the data
    tag_collector_url = config.views.data.tag_collector_url
    # store the data into CouchDB auxiliary database under "software" document
    couch_host = config.views.data.couch_host
    reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db

    # get data from tag collector
    all_archs_and_versions = _get_all_scramarchs_and_versions(
        tag_collector_url)
    if not all_archs_and_versions:
        return

    # get data already stored in CouchDB
    couchdb = Database(dbname=reqmgr_aux_db, url=couch_host)
    try:
        sw_already_stored = couchdb.document("software")
        del sw_already_stored["_id"]
        del sw_already_stored["_rev"]
    except CouchNotFoundError:
        logging.error("Document id software, does not exist, creating it ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
        return

    # now compare recent data from tag collector and what we already have stored
    # sorting is necessary
    if sorted(all_archs_and_versions) != sorted(sw_already_stored):
        logging.debug(
            "ScramArch/CMSSW releases changed, updating software document ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
    """
def dump(full_dump=False, fields=None):
    print("Querying fields: %s\n\n" % fields)
    db = Database(couch_db_name, couch_url)
    couch_requests = db.allDocs()
    doc_counter = 0
    for row in couch_requests["rows"]:
        if row["id"].startswith("_design"): continue
        doc = db.document(row["id"])
        if fields:
            s = ''
            for f in fields:
                try:
                    s += "%s:%s  " % (f, doc[f])
                except KeyError:
                    s += "%s:n/a  " % f 
            print("%s  %s\n" % (s, doc["RequestName"]))
        elif full_dump:
            print("%s\n%s\n%s\n" % (row["id"], doc, 70*'-'))
        else:
            print(row["id"])
        doc_counter += 1
        #if doc_counter > 100:
        #    break
    print("Total documents: %s" % doc_counter) 
Exemple #12
0
def update_software(config_file):
    """
    Functions retrieves CMSSW versions and scramarchs from CMS tag collector.
    
    """
    config = loadConfigurationFile(config_file)
    # source of the data
    tag_collector_url = config.views.data.tag_collector_url
    # store the data into CouchDB auxiliary database under "software" document
    couch_host = config.views.data.couch_host
    reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db
    
    # get data from tag collector
    all_archs_and_versions = _get_all_scramarchs_and_versions(tag_collector_url)
    if not all_archs_and_versions:
        return
    
    # get data already stored in CouchDB    
    couchdb = Database(dbname=reqmgr_aux_db, url=couch_host)
    try:
        sw_already_stored = couchdb.document("software")
        del sw_already_stored["_id"]
        del sw_already_stored["_rev"]
    except CouchNotFoundError:
        logging.error("Document id software, does not exist, creating it ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
        return
    
    # now compare recent data from tag collector and what we already have stored
    # sorting is necessary
    if sorted(all_archs_and_versions) != sorted(sw_already_stored):
        logging.debug("ScramArch/CMSSW releases changed, updating software document ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
    """
Exemple #13
0
def dump(full_dump=False, fields=None):
    print "Querying fields: %s\n\n" % fields
    db = Database(couch_db_name, couch_url)
    couch_requests = db.allDocs()
    doc_counter = 0
    for row in couch_requests["rows"]:
        if row["id"].startswith("_design"): continue
        doc = db.document(row["id"])
        if fields:
            s = ''
            for f in fields:
                try:
                    s += "%s:%s  " % (f, doc[f])
                except KeyError:
                    s += "%s:n/a  " % f
            print "%s  %s\n" % (s, doc["RequestName"])
        elif full_dump:
            print "%s\n%s\n%s\n" % (row["id"], doc, 70 * '-')
        else:
            print row["id"]
        doc_counter += 1
        #if doc_counter > 100:
        #    break
    print "Total documents: %s" % doc_counter
Exemple #14
0
class OpsClipboardTest(unittest.TestCase):
    def setUp(self):
        # For experiments with CouchDB content it's useful when the docs
        # remain the the database by commenting out tearDownCouch statement.
        # If the database exists at this point, tearDownCouch was probably
        # commented out, so do not drop the database        
        #self.testInit = TestInitCouchApp(__file__, dropExistingDb=False)
        self.testInit = TestInitCouchApp(__file__, dropExistingDb=True)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        dbName = "opsclipboard_t"
        self.testInit.setupCouch(dbName, "OpsClipboard")
        # the tests uses httplib2 for accessing the OpsClipboard couchapp to
        # emulate web browser access rather than direct REST access
        # couch attribute is only used for back verification of written/modified data                 
        #couchServer = CouchServer(os.environ["COUCHURL"])
        #self.configDatabase = couchServer.connectDatabase(dbName)
        # used to verify written / modified data in CouchDB
        self.couch = Database(dbName, self.testInit.couchUrl)
        
            
    def tearDown(self):
        # comment out to see stuff remaining in the database
        self.testInit.tearDownCouch() # self.testInit.couch gets None-ed here
        #pass

        
    def _inject(self, numRequests):
        # This creates 10 documents using the test data above representing
        # 10 requests belonging to two campaigns that have just been placed
        # into the "ops-hold" into the ReqMgr. 
        # Whenever a request enters the "ops-hold" state, the dict containing the
        # request params should be added to the OpsClipboard using the
        # inject API call (see Assign.py):
        requests, campaignIds, requestIds = getTestRequests(numRequests)
        OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *requests)
        return requests, campaignIds, requestIds
    
    
    def _getViewResults(self, viewName, options = {}):
        """
        Query CouchDB viewName, return rows.
        
        """
        try:
            result = self.couch.loadView("OpsClipboard", viewName, options)
        except Exception as ex:
            msg = "Error loading OpsClipboard view: '%s', reason:%s\n" % (viewName, ex)
            self.fail(msg)
        return result[u"rows"]
    
        
    def testA_view_all(self):
        """
        Testing the 'all' view.
        
        """
        self._inject(10) # creates 10 documents
        # Now read back data for the test requests and verify
        # that we have 10 requests in the OpsClipboard
        # all view returns all requests in the OpsClipboard
        allRequests = self._getViewResults("all")   
        self.assertEqual(len(allRequests), 10) # expected 10 requests
        for request in allRequests:
            self.failUnless(request[u"key"] == u"NewlyHeld")

        
    def testB_view_campaign(self):
        """
        Testing the 'campaign' view.
        Returns requests with campaign_id as keys.
        
        """
        _, campaignIds, requestIds = self._inject(7) # creates x docs/requests
        campView = self._getViewResults("campaign")
        self.assertEqual(len(campView), 7)
        for c in campView:
            self.failUnless(c[u"key"] in campaignIds)
            self.failUnless(c[u"value"][u"request_id"] in requestIds)
            # check that result ('value') dictionary has all these keys     
            map(self.failUnless, [c[u"value"].has_key(key) for key in ("doc_id", "state", "updated")])
                        
            
    def testC_view_campaign_ids(self):
        """
        Testing the 'campaign_ids' view.
        Returns a list of campaign names (campaign_ids) with duplicates removed.
        
        """
        _, campaignIds, _ = self._inject(8) # creates x docs/requests
        campList = self._getViewResults("campaign_ids", options = {"group": True})
        expected = [campList[0]["key"], campList[1]["key"]] 
        self.assertEqual(expected, campaignIds)
        
        
    def testD_view_reject_update_changestate(self):
        """
        Testing the 'reject' view.
        Calls changeState function which also tests 'changestate'
            update (CouchDB) function.        
        Returns a list of requests in the 'ReadyToReject' state.
        
        """
        numRequests = 8    
        self._inject(numRequests) # creates x docs/requests
        # all currently injected requests are in the
        # "NewlyHeld" state, none in the "ReadyToReject" state 
        rejectList = self._getViewResults("reject")
        self.assertEqual(len(rejectList), 0)
        # change state, need to get docIds from CouchDB first
        allList = self._getViewResults("all")
        for allItem in allList:
            docId = allItem[u"id"]
            try:
                changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToReject")
            except Exception as ex:
                self.fail(ex)
        rejectList = self._getViewResults("reject")
        self.assertEqual(len(rejectList), numRequests)
         

    def testE_view_release_update_changestate(self):
        """
        Testing the 'release' view.
        Calls changeState function which also tests 'changestate'
            update (CouchDB) function.
        Returns a list of requests in the 'ReadyToRelease' state.
        
        """
        numRequests = 18    
        self._inject(numRequests) # creates x docs/requests
        # all currently injected requests are in the
        # "NewlyHeld" state, none in the "ReadyToRelease" state 
        rejectList = self._getViewResults("release")
        self.assertEqual(len(rejectList), 0)
        # change state, need to get docIds from CouchDB first
        allList = self._getViewResults("all")
        for allItem in allList:
            docId = allItem[u"id"]
            try:
                changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToRelease")
            except Exception as ex:
                self.fail(ex)
        rejectList = self._getViewResults("release")
        self.assertEqual(len(rejectList), numRequests)
        
        
    def testF_view_request(self):
        """
        Testing the 'request' view.
        This view allows for look up of some request details by id.
        
        """
        _, _, requestIds = self._inject(15) # creates x docs/requests
        requestView = self._getViewResults("request")
        self.assertEqual(len(requestView), 15)
        for reqView in requestView:
            self.failUnless(reqView[u"key"] in requestIds)
            self.failUnless(reqView[u"value"][u"state"] == u"NewlyHeld")

            
    def testG_view_request_id(self):
        """
        Testing the 'request_ids' view.
        'request_ids' maps couch docs to request ids.
        
        """
        self._inject(11) # creates x docs/requests
        viewResult = self._getViewResults("request_ids")
        requestIds  = [ x[u"key"] for x in viewResult ]
        self.assertEqual(len(requestIds), 11)
        
        
    def testH_view_expunge(self):
        """
        Testing the 'expunge' view.
        
        """
        self._inject(4) # creates x docs/requests
        requestView = self._getViewResults("all")
        # no "ReadyToReject" or "ReadyToReject" request, everything is in "NewlyHeld"
        self.assertEqual(len(requestView), 4)
        c = 0
        for req in requestView:
            docId = req[u"value"]
            try:
                state = "ReadyToReject" if c % 2 == 0 else "ReadyToReject" 
                changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, state)
            except Exception as ex:
                self.fail(ex)
            c += 1
        expungeView = self._getViewResults("expunge")
        self.assertEqual(len(expungeView), 4)
        for req in expungeView:
            self.assertTrue(req[u"key"] in ("ReadyToReject", "ReadyToReject"))
             

    def testI_requestStructure(self):
        """
        Pull documents for each request and check structure.
        
        """
        _, campaignIds, requestIds = self._inject(20) # creates x documents / requests
        allRequests = self._getViewResults("all")   
        for req in allRequests:
            docId = req[u"id"]
            state = req[u"key"]
            # all requests should be NewlyHeld state
            self.assertEqual(state, "NewlyHeld")
            # check that the doc is well formed and matches the data we inserted
            doc = self.couch.document(docId)    
            self.failUnless(doc[u"state"] == "NewlyHeld")
            self.failUnless(doc.has_key(u"created"))
            self.failUnless(doc.has_key(u"timestamp"))
            # description is a list of dictionaries, the first one is the initial message
            self.failUnless("Initial injection by the RequestManager" in doc[u"description"][0].values())
            self.failUnless(doc[u"request"][u"campaign_id"] in campaignIds)
            self.failUnless(doc[u'request'][u'request_id'] in requestIds)
            
            
    def testJ_update_adddescription(self):
        """
        Create a document and update function 'adddescription' handler
        to add descriptions (Ops notes) to request documents.
         
        """
        request = {"RequestName" : "testB_request", "CampaignName" : "testB_campaign"}
        OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *[request])
        allRequests = self._getViewResults("all")
        self.assertEqual(len(allRequests), 1) # check only one request
        docId = allRequests[0][u"id"]
        # update the doc descriptions
        addDescription(self.testInit.couchUrl, self.testInit.couchDbName, docId, "NewDescription")
        doc = self.couch.document(docId)
        descriptions = doc["description"]
        # description entry is a list of dictionaries, each newly created request
        # has first initial description, just above added was the second one, index 1
        self.failUnless("NewDescription" in doc[u"description"][1].values())
class ContinuousSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        random.seed()
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs):
        """
        _buildRandomNumberList_

        Builds a list with n pseudorandomly distributed
        numbers according to some given distribution
        """
        numberList = []
        if not kwargs:
            kwargs = {"mu" : 0, "sigma" : 1}
        for _ in range(n):
            generator = getattr(random, distribution)
            numberList.append(generator(**kwargs))

        return numberList

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of uniformly
        distributed pseudorandom numbers. Check
        that the statistic properties
        in the histogram are accurate to some degree,
        that the histogram binning is done right and
        that this can become a document an uploaded to couch
        """
        inputData = self.buildRandomNumberList(1000)

        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')

        # Populate the histogram
        for point in inputData:
            histogram.addPoint(point)

        # Get the JSON
        jsonHistogram = histogram.toJSON()

        # Check the histogram core data
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertTrue(jsonHistogram["continuous"])

        # Check the internal data
        self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel")
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("TestHisto")
        self.assertEqual(len(storedJSON["data"]), 16)

        return

    def testB_extremeData(self):
        """
        _testB_extremeData_

        Put extreme points in the data and try to build a histogram.
        Check that it can process all this correctly
        """

        # First no data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertEqual(jsonHistogram["average"], 0.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 0)

        # Data with NaNs and Infs
        inputData = self.buildRandomNumberList(100)
        inputData.append(float('NaN'))
        inputData.append(float('Inf'))
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        # One single point, P5
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        histogram.addPoint(5)
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["average"], 5.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 1)
        self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1)

        # Test that toJSON is idempotent
        inputData = self.buildRandomNumberList(100)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        oldData = jsonHistogram["data"]
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["data"], oldData)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        return

    def testC_compactHistogram(self):
        """
        _testC_compactHistogram_

        Check that we can create smaller histograms objects
        by chopping outliers and dropping the data all together
        """

        # Input normally distributed data and chop anything above 1 stdev (32% of data)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               dropOutliers = True, sigmaLimit = 1)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)
        pointsInHistogram = sum([x for x in viewvalues(jsonHistogram["data"])])

        # With high probability we must have chopped at least one point
        self.assertTrue(pointsInHistogram < 1000)
        self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1)

        # Create a histogram without histogram data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               storeHistogram = False)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 0)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        return
Exemple #16
0
class database:
    logger = logfactory

    class DatabaseNotFoundException(Exception):
        def __init__(self,  db=''):
            self.db = str(db)
            database.logger.error('Database "%s" was not found.' % (self.db), level='critical')

        def __str__(self):
            return 'Error: Database ',  self.db,  ' was not found.'

    class DatabaseAccessError(Exception):
        def __init__(self,  db=''):
            self.db = str(db)
            database.logger.error('Could not access database "%s".' % (self.db), level='critical')

        def __str__(self):
            return 'Error: Could not access database ',  self.db

    class DocumentNotFoundException(Exception):
        def __init__(self,  name=''):
            self.name = name
            database.logger.error('Document "%s" was not found.' % (self.name))

        def __str__(self):
            return 'Error: Document ',  self.name,  ' was not found.'

    class MapReduceSyntaxError(Exception):
        def __init__(self,  query=''):
            self.query = query
            database.logger.error('Invalid query <%s>' % (self.query))

        def __str__(self):
            return 'Error: Invalid query "' + self.query + '"'

    class InvalidOperatorError(Exception):
        def __init__(self,  op=''):
            self.op = str(op)
        def __str__(self):
            return 'Error: Operator "' + self.op + '" is invalid.'
    class InvalidParameterError(Exception):
        def __init__(self,  param=''):
            self.param = str(param)
        def __str__(self):
            return 'Error: Invalid Parameter: ' + self.param

    cache_dictionary = defaultdict(lambda: None)

    def __init__(self,  db_name='',url=None, cache=False):
        host = os.environ['HOSTNAME'] 
        if url == None:
            url =locator().dbLocation()
        #self.logger.log('I chose the url %s'%(url))
        if not db_name:
            raise self.DatabaseNotFoundException(db_name)
        self.db_name = db_name
        self.cache = cache
        if self.db_name in ['campaigns','chained_campaigns']:
            ## force cache for those.
            self.cache=True

        try:    
            self.db = Database(db_name, url=url)
            #            self.db = Database(db_name, url='http://preptest.cern.ch:5984/')
            #            self.db = Database(db_name) # for using private DB @localhost:5984
        except ValueError as ex:
            raise self.DatabaseAccessError(db_name)
            
        self.allowed_operators = ['<=',  '<',  '>=',  '>',  '==',  '~=']

    def __is_number(self, s):
        try:
            float(s)
            return True
        except ValueError:
            return False
       
    def get(self,  prepid=''):
        if self.cache:
            result = self.__get_from_cache(prepid)
            if result: return result

        self.logger.log('Looking for document "%s" in "%s"...' % (prepid,self.db_name))
        try:
            doc = self.db.document(id=prepid)
            if self.cache:
                self.__save_to_cache( prepid, doc)
            return doc
        except Exception as ex:
            self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex))
            return {}

    def __save_to_cache(self, key, value):
        from tools.locker import locker
        with locker.lock(key):
            self.cache_dictionary[key]=value

    def __get_from_cache(self, key):
        from tools.locker import locker
        with locker.lock(key):
            return self.cache_dictionary[key]

    def __document_exists(self,  doc):
        if not doc:
            self.logger.error('Trying to locate empty string.', level='warning')
            return False
        id = ''
        if 'prepid' not in doc:
            if '_id' not in doc:
                self.logger.error('Document does not have an "_id" parameter.', level='critical')
                return False
            id = doc['_id']
        elif '_id' not in doc:
            if 'prepid' not in doc:
                self.logger.error('Document does not have an "_id" parameter.', level='critical')
                return False
            id = doc['prepid']
        id = doc['_id']
        return self.__id_exists(prepid=id)

    def document_exists(self, prepid=''):
	self.logger.log('Checking existence of document "%s" in "%s"...' % (prepid,self.db_name))
        return self.__id_exists(prepid) 
    
    def __id_exists(self,  prepid=''):
        try:
            if self.cache and self.__get_from_cache(prepid) or self.db.documentExists(id=prepid):
                return True
            self.logger.error('Document "%s" does not exist.' % (prepid))
            return False  
        except CouchError as ex:
            self.logger.error('Document "%s" was not found on CouchError Reason: %s trying a second time with a time out' % (prepid, ex))
            time.sleep(0.5)
            return self.__id_exists(prepid)
        except Exception as ex:
            self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex))
            return False
    
    def delete(self, prepid=''):
        if not prepid:
            return False
        if not self.__id_exists(prepid):
            return False

        self.logger.log('Trying to delete document "%s"...' % (prepid))
        try:
            self.db.delete_doc(id=prepid)
            if self.cache:
                self.__save_to_cache(prepid, None)

            return True
        except Exception as ex:
            self.logger.error('Could not delete document: %s . Reason: %s ' % (prepid, ex))
            return False            

    def update(self,  doc={}):
        if '_id' in doc:
            self.logger.log('Updating document "%s" in "%s"' % (doc['_id'],self.db_name))
        if self.__document_exists(doc):
            if self.cache:
                ##JR the revision in the cache is not the one in the DB at this point
                # will be retaken at next get
                self.__save_to_cache(doc['_id'], None)
            return self.save(doc)
        self.logger.error('Failed to update document: %s' % (json.dumps(doc)))         
        return False
        
    def update_all(self,  docs=[]):
        if not docs:
            return False
            
        for doc in docs:
            if self.__document_exists(doc):
                self.db.queue(doc)
        try:
            self.db.commit()
            return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex))
            return False        
        
    def get_all(self, page_num=-1): 
        try:
            limit, skip = self.__pagify(page_num)
            if limit >= 0 and skip >= 0: 
                result = self.db.loadView(self.db_name, "all", options={'limit':limit,'skip':skip, 'include_docs':True})['rows']
                res = map(lambda r : r['doc'], result)
                return res
            result = self.db.loadView(self.db_name, "all",options={'include_docs':True})['rows']
            res = map(lambda r : r['doc'], result)
            return res
        except Exception as ex:
            self.logger.error('Could not access view. Reason: %s' % (ex))
            return []

    
    def query(self,  query='', page_num=0):
        if not query:
            result = self.get_all(page_num)
            #res =  map(lambda r : r['doc'], result)
            return result
        try:
            result = self.__query(query, page=page_num)
            #res =  map(lambda r : r['doc'], result)
            return result
        except Exception as ex:
            self.logger.error('Could not load view for query: <%s> . Reason: %s' % (query, ex))
            return []

    def unique_res(self,query_result):
        docids = map(lambda doc : doc['_id'] , query_result)
        docids_s = list(set(docids))
        if len(docids) != len(docids_s):
            docids_s = []
            return_dict= copy.deepcopy( query_result )
            for doc in query_result:
                if not doc['_id'] in docids_s:
                    docids_s.append(doc['_id'])
                else:
                    return_dict.remove(doc)		
            return return_dict
        return query_result

    def queries( self, query_list):
        ##page_nume does not matter 
        if not len(query_list):
            return self.get_all(page_num=-1)
        try:

            results_list=[]
            ##make each query separately and retrieve only the doc with counting == len(query_list)
            for (i,query_item) in enumerate(query_list):
                res = self.query(query_item, page_num=-1)
                query_result = self.unique_res( res )
                if i!=0:
                    ## get only the one already in the intersection
                    id_list = map(lambda doc : doc['_id'], results_list)
                    results_list = filter(lambda doc : doc['_id'] in id_list, query_result)
                else:
                    results_list= query_result
            return results_list
        except Exception as ex:
            self.logger.error('Could not load view for queris: <%s> . Reason: %s' % ('<br>'.join(query_list), ex))
            return []

    def __extract_operators(self,  query=''):

        if not query:
            self.logger.error('Empty query', level='warning')
            return ()
        clean = []
        tokens = []
        for op in self.allowed_operators:
            if op in query:
                tokens = query.rsplit(op)
                tokens.insert(1,  op)
            else:
                continue
            for tok in tokens:
                if len(tok) < 1:
                    continue
                clean.append(tok.strip().strip('"'))
            if len(clean) != 3:
                raise self.MapReduceSyntaxError(query)
            #if clean[0] not in self.request and clean[1] not in self.campaign:
            #    raise self.IllegalParameterError(clean[0])
            return clean
        raise self.MapReduceSyntaxError(query)
    
    def __pagify(self, page_num=0, limit=20):
        if page_num < 0:
            return -1,0
        skip = limit*page_num
        return limit, skip      
    
    def __execute_query(self, tokenized_query='', page=-1, limit=20):
            tokens = []
            try:
                tokens = self.__extract_operators(tokenized_query)
            except Exception as ex:
                self.logger.error('Could not parse query. Reason: %s' % (ex))
                return []
            if tokens:
                view_name, view_opts = self.__build_query(tokens)
                if not view_name or not view_opts:
                    return []
                if page > -1:
                    view_opts['limit']=limit
                    view_opts['skip']=page*limit                    
                view_opts['include_docs']=True
                result = self.db.loadView(self.db_name, view_name, options=view_opts)['rows']
                res =  map(lambda r : r['doc'], result)
                return res
            else:
                return []
    
    def raw_query(self,  view_name,  options={}):
        self.logger.error('Executing raw query to the database. Accessed view: %s' % (view_name), level='warning') 
        return self.db.loadView(self.db_name,  view_name,  options)['rows']
                
    def __get_op(self, oper):
        if oper == '>':
            return lambda x,y: x > y
        elif oper == '>=':
            return lambda x,y: x >= y
        elif oper == '<':
            return lambda x,y: x < y
        elif oper == '<=':
            return lambda x,y: x <= y
        elif oper == '==':
            return lambda x,y: x == y       
        else:
            return None     
        
    def __filter(self, tokenized_query=[], view_results=[]):
        if len(tokenized_query) != 3:
            return view_results
        prn = tokenized_query[0]
        op = tokenized_query[1]
        if self.__is_number(tokenized_query[2]):
            val = float(tokenized_query[2])
        else:
            val = tokenized_query[2]
        f = self.__get_op(op)
        return filter(lambda x: f(x[prn],val), view_results)    

    def __query(self, query='', page=0, limit=20):
        t_par = []
        results = []
        #what is that , split for ???
        #if ',' in query:
        #     t_par = query.rsplit(',')
        if not t_par:
             t_par = [query]
        if len(t_par) == 1:          
            return self.__execute_query(t_par[0], page, limit)#[page*limit:page*limit+limit]
        elif len(t_par) == 0:
            return []

        #temp = self.__execute_query(t_par[0])#[page*limit:page*limit+limit]
        res = self.__execute_query(t_par[0])
        #res = map(lambda x: x['value'], temp) 
        if len(res) == 0:
            return []
        for i in range(1,len(t_par)):
            tq = self.__extract_operators(t_par[i])
            res = self.__filter(tq, res)
        #return map(lambda x: {'value':x},res[page*limit:page*limit+20])
        return res[page*limit:page*limit+20]
                    
    def __build_query(self,tokens=[]):
        if not tokens:
            return None,None
        if len(tokens) != 3:
            raise self.MapReduceSyntaxError(tokens)
        param = tokens[0]
        op = tokens[1]     
        kval = tokens[2]
        try:
            view_opts = self.__build_options(op, kval)
        except Exception as ex:
            self.logger.error('Value types are not compatible with operator %s value %s Error: %s' % (op, kval, str(ex))) 
            return None,None
        return param, view_opts
    
    def __build_options(self,op, val):
        def is_number(s):
            try:
                float(s)
                return True
            except ValueError:
                return False
        
        # options dictionary
        opts = {} 
        
        # default the composite key search
        #if '[' in val and ']' in val:
        if val.startswith('[') and val.endswith(']'):
            if op == '==':
                try:                    
                    e=ast.literal_eval(val)
                    opts['key'] = e
                except:
                    opts['key'] = val
            return opts
        
        # handle alphanumeric key ranges
        num_flag = False
        if is_number(val):
            num_flag = True
            kval = float(val)
        else:
            kval = val.decode('ascii')
        if '>' in op:
            if '=' in op:
                opts['startkey']=kval
            else:
                if num_flag:
                    opts['startkey']=kval+1
                else:
                    opts['startkey']=kval
            if num_flag:
                opts['endkey']=99999999 # assume its numeric
            else:
                opts['endkey']=kval+u'\u9999'
        elif '<' in op:
            if '=' in op:
                opts['endkey']=kval
            else:
                if num_flag:
                    opts['endkey']=kval-1
                else:
                    opts['endkey']=kval
            if num_flag:
                opts['startkey']=-99999999
            else:
                opts['startkey']=''
                
        elif '==' == op:
            opts['key']=kval
        elif '~=' == op:
            if kval[-1] == '*':
                opts['startkey']=kval[:len(kval)-1]
                opts['endkey']=kval[:len(kval)-1]+u'\u9999'#'99999999'#'\u9999'
        return opts
            
  
    def save_all(self,  docs=[]):
        if not docs:
            return False
        for doc in docs:
            self.db.queue(doc)
        try:
            self.db.commit()
            return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) 
            return False

    def save(self, doc={}):
        if not doc:
            self.logger.error('Tried to save empty document.', level='warning')
            return False


	# TODO: Check if an object exists in the database and fail.

        #if '_id' in doc:
        #    self.logger.log('Using user-defined id: %s' % (doc['_id']))
        #if self.__document_exists(doc):
        #    self.logger.error('Failed to update document: %s' % (json.dumps(doc)))
        #    return False

        try:
            #self.logger.error('Document is %s %s'%(doc['_id'],doc))
            #self.logger.error(self.db.commitOne(doc))
            ## this is a change I just made (23/05/2013 13:31) because of the return value of update should be True/False
            saved = self.db.commitOne(doc)
            if 'error' in saved[0]:
                self.logger.error('Commit One says : %s'%(saved))
                return False
            else:
                return True
        except Exception as ex:
            self.logger.error('Could not commit changes to database. Reason: %s' % (ex))
            return False

    def count(self):
        try:
            return len(self.db.allDocs()) 
        except Exception as ex:
            self.logger.error('Could not count documents in database. Reason: %s' % (ex))
            return -1 
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
    #    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
    #    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(
        config.AnalyticsDataCollector.centralRequestDBURL,
        couchapp=config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = [
        "failed", "epic-FAILED", "completed", "closed-out", "announced",
        "aborted", "aborted-completed", "rejected", "normal-archived",
        "aborted-archived", "rejected-archived"
    ]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
            #            options = {'include_docs': False}
            date_range = {
                'startkey': [2015, 5, 15, 0, 0, 0],
                'endkey': [2015, 5, 26, 0, 0, 0]
            }
            #            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                    finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (
                wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (
                    wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(
                    *[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
class ContinuousSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        random.seed()
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs):
        """
        _buildRandomNumberList_

        Builds a list with n pseudorandomly distributed
        numbers according to some given distribution
        """
        numberList = []
        if not kwargs:
            kwargs = {"mu" : 0, "sigma" : 1}
        for _ in range(n):
            generator = getattr(random, distribution)
            numberList.append(generator(**kwargs))

        return numberList

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of uniformly
        distributed pseudorandom numbers. Check
        that the statistic properties
        in the histogram are accurate to some degree,
        that the histogram binning is done right and
        that this can become a document an uploaded to couch
        """
        inputData = self.buildRandomNumberList(1000)

        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')

        # Populate the histogram
        for point in inputData:
            histogram.addPoint(point)

        # Get the JSON
        jsonHistogram = histogram.toJSON()

        # Check the histogram core data
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertTrue(jsonHistogram["continuous"])

        # Check the internal data
        self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel")
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("TestHisto")
        self.assertEqual(len(storedJSON["data"]), 16)

        return

    def testB_extremeData(self):
        """
        _testB_extremeData_

        Put extreme points in the data and try to build a histogram.
        Check that it can process all this correctly
        """

        # First no data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["title"], "TestHisto")
        self.assertEqual(jsonHistogram["xLabel"], "MyLabel")
        self.assertEqual(jsonHistogram["average"], 0.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 0)

        # Data with NaNs and Infs
        inputData = self.buildRandomNumberList(100)
        inputData.append(float('NaN'))
        inputData.append(float('Inf'))
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        # One single point, P5
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        histogram.addPoint(5)
        jsonHistogram = histogram.toJSON()
        self.assertEqual(jsonHistogram["average"], 5.0)
        self.assertEqual(jsonHistogram["stdDev"], 0.0)
        self.assertEqual(len(jsonHistogram["data"]), 1)
        self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1)

        # Test that toJSON is idempotent
        inputData = self.buildRandomNumberList(100)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel')
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        oldData = jsonHistogram["data"]
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 7)
        self.assertEqual(jsonHistogram["data"], oldData)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100)

        return

    def testC_compactHistogram(self):
        """
        _testC_compactHistogram_

        Check that we can create smaller histograms objects
        by chopping outliers and dropping the data all together
        """

        # Input normally distributed data and chop anything above 1 stdev (32% of data)
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               dropOutliers = True, sigmaLimit = 1)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 16)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)
        pointsInHistogram = sum([x for x in jsonHistogram["data"].values()])

        # With high probability we must have chopped at least one point
        self.assertTrue(pointsInHistogram < 1000)
        self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1)

        # Create a histogram without histogram data
        histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel',
                                               storeHistogram = False)
        inputData = self.buildRandomNumberList(1000)
        for point in inputData:
            histogram.addPoint(point)
        jsonHistogram = histogram.toJSON()
        self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0)
        self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0)
        self.assertEqual(len(jsonHistogram["data"]), 0)
        self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000)

        return
Exemple #19
0
Fichier : API.py Projet : dmwm/DQIS
class TestDQISResult(unittest.TestCase):
    DB_NAME = 'dqis_test'
    DB_URL = 'localhost:5984'

    def setUp(self):        
        couch = CouchServer(dburl=self.DB_URL)
        if self.DB_NAME in couch.listDatabases():
            couch.deleteDatabase(self.DB_NAME)
        
        cdb = couch.connectDatabase(self.DB_NAME)

        #for dq_t in test_data.demo_data:
        #    cdb.queue(dq_t)
        
        cdb.commit()
        
        self.db = Database(dbname=self.DB_NAME)
        
    
    def test_init(self):        
        #self.assertEqual(1,2)
        pass

    def test_save_and_delete(self):
        #Shoud document get revision number after save?
        #Document can not be saved and then deleted. Because save returns not a DQISResult object!
        
        #Tests document saving 
        document = {"_id": "abc", "test":"data"}
        r = API.DQISResult(dqis_db = self.db, dict = document)
        all_docs_count_before = len(self.db.allDocs()['rows'])
        r.save()
        all_docs_count_after_insert = len(self.db.allDocs()['rows'])        
        
        self.assertEqual(all_docs_count_before +1, all_docs_count_after_insert)
        
        
        #Test delete
        doc = self.db.document("abc")
        r = API.DQISResult(dict=doc, dqis_db = self.db)
        self.assertEqual(doc["test"], "data")
        r.delete()
        self.db.commitOne(r)
        all_docs_count_after_deleting = len(self.db.allDocs()['rows']) 
        self.assertEqual(all_docs_count_before, all_docs_count_after_deleting )
        
    def test_savable(self):
        #Does ID has to raise exception
        rez = API.DQISResult(dict = {'_id': "123"})._require_savable()
        self.assertEqual(rez, None)
        self.assertRaises(DQISResultNotSavable, 
                    API.DQISResult(dict = {'id': "123"})._require_savable )
        self.assertRaises(DQISResultNotSavable, 
                    API.DQISResult(dict = {'abc': "123"})._require_savable )
        
    def test_find_id(self): #similar to test_savable
        self.assertEqual(DQISResult()._find_id(), "")
        self.assertEqual(DQISResult(dict = {'id': "123"})._find_id(), "123")
        self.assertEqual(DQISResult(dict = {'_id': "123"})._find_id(), "123") 
        
    def test_find_id(self):
        id1 = API.DQISResult()._find_id()
        id2 = API.DQISResult(dict = {'id': "123"})._find_id()
        id3 = API.DQISResult(dict = {'_id': "abc"})._find_id()
        self.assertEqual(id1, "")
        self.assertEqual(id2, '123')
        self.assertEqual(id3, 'abc')
        
    def test_require_saveable(self):
        dr1 = API.DQISResult()._require_savable
        #dr2 = API.DQISResult(dict = {'_id': "123"})._require_savable
        self.assertRaises(DQISResultNotSavable, dr1)
        #self.assertEqual(None, dr2())
        
    def test_save_to_queue(self):
        r = DQISResult(dqis_db = Database(), dict = {"_id": "abc"})
        queue_size_before = len(r.dqis_db._queue)
        r.saveToQueue()
        queue_size_after = len(r.dqis_db._queue) 
        self.assertEqual(queue_size_before, 0)
        self.assertEqual(queue_size_after, 1)
        r.dqis_db._reset_queue()
        
        
    def test_require_db(self):
        f = DQISResult()._require_db_connection
        self.assertRaises(DatabaseNotSetException, f)  
        
        f = DQISResult(dqis_db = "dqis_db")._require_db_connection
        self.assertRaises(DatabaseNotSetException, f)  
        
        f = DQISResult(dqis_db = Database())._require_db_connection
        self.assertEqual(None, f())



    def test_get_document(self):
        doc_id = '100215-0-38bc1d29bd22844103e86f9a000500e2' 
        r = API.DQISResult(API.Database(dbname="dqis"))
        r['id'] = doc_id
        doc = r.get_document()
        self.assertEqual(doc.run, 100215)
        doc_id = '' 
        r = DQISResult(Database(dbname="dqis"))
        r['id'] = doc_id
        fdoc = r.get_document 
        fdoc()
        self.assertRaises(DQISResultNotSavable, fdoc) # because get and s
def main():
    
    globalwq = Database('workloadsummary_testdisplay', 'https://dballesteros.iriscouch.com')
    x = globalwq.document("an_id")
    print x['performance']['/linacre_ACDC2_ReReco13JulHT_120723_102457_7693_120810_203338_8896/DataProcessing']['cmsRun1']
def main():
    myDB = Database('workqueue_inbox', 'https://vocms169.cern.ch/couchdb')
    document = myDB.document(sys.argv[1])
    inputs = document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement']['Inputs']
    for block in inputs:
        print '                           "%s",' % block
def main():
    
    parser = OptionParser()
    parser.add_option("-f", "--input-acdc", dest="acdcList")
    parser.add_option("-m", "--input-mapfile", dest="mapFile")
    parser.add_option("-u", "--url", dest="url")
    parser.add_option("-d", "--dry-run", dest="dryRun",
                      action="store_true", default=False)
    parser.add_option("-l", "--log-file", dest="logFile")

    (options, _) = parser.parse_args()
    
    handle = open(options.logFile, 'w')
    
    url = options.url
    database = 'wmagent_acdc'
    acdcDB = Database(database, url)
    handle.write('Opening ACDC database in %s/%s\n' % (url, database))
    
    inputACDC = readACDCInput(options.acdcList)
    usersMap = readUsersMap(options.mapFile)
    handle.write('Have %d workflows to fix\n' % len(inputACDC))
    handle.write('=================================================================\n')
    for workflow in inputACDC:
        collection_name = workflow['collection_name']
        fileset_name = workflow['fileset_name']
        original_dn = workflow['original_dn']
        handle.write('Original workflow: %s\n' % collection_name)
        handle.write('Original task: %s\n' % fileset_name)
        handle.write('Original owner DN: %s\n' % original_dn)
        if original_dn in usersMap:
            handle.write('This DN maps to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0]))
        else:
            handle.write('The original DN can not be found in the map file, skipping the workflow\n')
            continue
        params = {'reduce' : False,
                  'key' : [usersMap[original_dn][1], usersMap[original_dn][0], collection_name, fileset_name]}
        result = acdcDB.loadView('ACDC', 'owner_coll_fileset_docs', params)
    
        rows = result['rows']
        docIds = map(lambda x : x['id'], rows)
        handle.write('Found %d documents to change\n' % len(rows))
        handle.write('Changing from %s-%s to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0],
                                                       workflow['group'], workflow['owner']))

        for docId in docIds:
            doc = acdcDB.document(docId)
            doc['owner'] = {'group' : workflow['group'], 'user' : workflow['owner']}
            if not options.dryRun:
                acdcDB.queue(doc)
        if not options.dryRun:
            response = acdcDB.commit()
        else:
            response = 'This is a dry-run no changes were made'
        
        handle.write('Response to write operation: %s\n'% str(response))
        handle.write('Response length: %d\n' % len(response))
        handle.write('=================================================================\n')
    
    handle.write('Finished script')
    handle.close()
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
#    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
#    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, 
                                 couchapp = config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name = "workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = ["failed", "epic-FAILED", "completed", "closed-out",
                  "announced", "aborted", "aborted-completed", "rejected",
                  "normal-archived", "aborted-archived", "rejected-archived"]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
#            options = {'include_docs': False}
            date_range = {'startkey': [2015,5,15,0,0,0], 'endkey': [2015,5,26,0,0,0]}
#            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                  finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(*[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
def swapLocations(options):
    #Initialize stuff
    phedexAPI = PhEDEx({'cachepath' : options.cachepath})
    acdcCouch = Database('wmagent_acdc', options.acdcUrl)

    #Let's get the IDs of the ACDC documents for the task/request/group/user
    array = [options.group, options.user, options.request, options.task]
    result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array])

    documentsIDs = [x['id'] for x in result['rows']]

    #Load the map file saying what we want to change of location
    mapFile = open(options.map, 'r')
    locationMap = json.load(mapFile)
    mapFile.close()

    #Go through the documents
    for docID in documentsIDs:
        doc = acdcCouch.document(docID)

        #Are we going to change this doc? Better back it up
        if options.change:
            backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w')
            json.dump(doc, backupFile)
            backupFile.close()

        #Go through the files
        files = doc["files"]
        for inputFile in files:

            #Use PhEDEx API to get site based on the SE
            #Then map that to the desired target
            se = files[inputFile]["locations"][0]
            siteLocation = phedexAPI.getBestNodeName(se)
            targetLocation = locationMap.get(siteLocation, siteLocation)

            if siteLocation == targetLocation:
                #Nothing to do with this one, move on
                continue

            if not options.change:
                #No changes, then give the commands to move the files
                #Get the PFN for both the current location and the target location
                pfnDict = phedexAPI.getPFN(siteLocation, inputFile)
                inputPfn = pfnDict[(siteLocation, inputFile)]
                pfnDict = phedexAPI.getPFN(targetLocation, inputFile)
                targetPfn = pfnDict[(targetLocation, inputFile)]

                #Print it to stdout
                print "lcg-cp -D srmv2 -b %s %s" % (inputPfn, targetPfn)

            else:
                #This is changes time, let's move the stuff
                targetSE = phedexAPI.getNodeSE(targetLocation)
                files[inputFile]["locations"][0] = targetSE
                print "Changing location of %s from %s to %s" % (inputFile, se, targetSE)

        #If specified, commit the changes
        if options.change:
            acdcCouch.commitOne(doc)

    return 0
Exemple #25
0
class DiscreteSummaryHistogramTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        self.histogramDB = Database(dbname="histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of discrete data. Check
        that the statistic properties in the histogram are accurate,
        and that this can become a document an uploaded to couch
        """
        # Try and empty one
        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')
        histogramJSON = histogram.toJSON()

        self.assertEqual(histogramJSON["title"], "SomeTitle")
        self.assertEqual(histogramJSON["xLabel"], "Categories")
        self.assertFalse(histogramJSON["continuous"])
        self.assertEqual(len(histogramJSON["data"]), 0)
        self.assertEqual(histogramJSON["average"], {})
        self.assertEqual(histogramJSON["stdDev"], {})

        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')

        for _ in range(5):
            histogram.addPoint("CategoryA", "FeatureA")
            histogram.addPoint("CategoryB", "FeatureB")

        for _ in range(17):
            histogram.addPoint("CategoryA", "FeatureB")
            histogram.addPoint("CategoryC", "FeatureB")

        for _ in range(3):
            histogram.addPoint("CategoryC", "FeatureA")

        jsonHistogram = histogram.toJSON()

        # Average/stdDev per feature:
        # FeatureA: avg = 2.7 stdev = 2.05
        # FeatureB: avg = 13 stdev = 5.66
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"],
                               2.7,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"],
                               13,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"],
                               2.05,
                               places=1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"],
                               5.66,
                               places=1)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17)

        # Test couch
        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("SomeTitle")
        self.assertEqual(len(storedJSON["data"]), 3)

        return
def main():
    if len(sys.argv) < 2:
        print("Missing the connect Oracle TNS argument (user/password@server).")
        sys.exit(1)
    tns = sys.argv[1]
    
    print("Creating CouchDB database connection ...")
    couchdb = Database(couchdb_name, couch_url)
    print("Creating Oracle database connection ...")
    oradb = cx_Oracle.Connection(tns)
    
    num_couch_requests = get_couchdb_row_count(couchdb)
    print("Total CouchDB request documents in ReqMgr: %s" % num_couch_requests)
    num_oracle_requests = get_oracle_row_count(oradb, "reqmgr_request")                                                
    print("Total Oracle requests entries in ReqMgr: %s" % num_oracle_requests)
        
    if num_couch_requests != num_oracle_requests:
        print("Number of requests in Oracle, CouchDB don't agree, fix that first.")
        sys.exit(1)
    else:
        print("Database cross-check (Oracle request names vs CouchDB): DONE, THE SAME.")
        
    
    def get_couch_value(couch_req, mapping):
        try:
            c = couch_req[mapping["couch"]]
            couch_missing = False
        except KeyError:            
            # comparison will not happen due to missing flag, the value
            # will be stored in couch
            c = "N/A"
            couch_missing = False
        return str(c), couch_missing
    
    
    def check_oracle_worflow_value(oracle_value, mapping, req_name):
        # check Oracle WORKFLOW value
        if mapping["oracle"] == "WORKFLOW":
            # https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache/linacre_2011A_442p2_DataReprocessingMuOnia_111119_005717/spec
            from_wf_url_req_name = oracle_value.rsplit('/', 2)[-2]
            if req_name != from_wf_url_req_name:
                print("Workflow URL mismatch: %s" % o)
                sys.exit(1) 


    counter = 0
    for oracle_req in get_oracle_data(oradb):
        req_name = oracle_req["REQUEST_NAME"]

        # FILTER
        # check only requests injected approx. after last deployment (a lot of
        # stuff should have already been fixed in ReqMgr)
        # _13041._*$ (ending of request name with date/time)
        #if not re.match(".*_1304[0-3][0-9]_.*$", req_name): # all April 2013
        #    continue
        
        counter += 1
        print("\n\n%s (%s)" % (req_name, counter))        
                
        couch_req = couchdb.document(req_name)
        couch_fields_to_correct = {}
        for mapping in MAPPING:
            if mapping["couch"] in COUCH_TO_IGNORE:
                continue
            o = str(oracle_req[mapping["oracle"]])
            c, couch_missing = get_couch_value(couch_req, mapping)
            check_oracle_worflow_value(o, mapping, req_name)
            
            # compare oracle and couch values
            # don't update value in couch if it exists and is non-empty
            if (couch_missing or o != c) and c in ('None', '0', '', "N/A"):
                print("%s %s != %s" % (mapping, o, c))
                # correct couch request by oracle value
                couch_fields_to_correct[mapping["couch"]] = o
        
        if couch_fields_to_correct:
            print("Couch corrected fields:")
            print(couch_fields_to_correct)
            if sys.argv[-1] == "-c":
                couchdb.updateDocument(req_name, "ReqMgr", "updaterequest",
                                       fields=couch_fields_to_correct, useBody=True)
                print("Couch updated")
        else:
            print("OK")
        
        # fields that should be removed from couch
        """
class DiscreteSummaryHistogramTest(unittest.TestCase):

    def setUp(self):
        """
        _setUp_

        Setup a couch database for testing
        of produced JSON
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("histogram_dump_t")
        self.histogramDB = Database(dbname = "histogram_dump_t")

    def tearDown(self):
        """
        _tearDown_

        Clean the couch
        """
        self.testInit.tearDownCouch()

    def testA_BasicTest(self):
        """
        _testA_BasicTest_

        Build a histogram from a set of discrete data. Check
        that the statistic properties in the histogram are accurate,
        and that this can become a document an uploaded to couch
        """
        # Try and empty one
        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')
        histogramJSON = histogram.toJSON()

        self.assertEqual(histogramJSON["title"], "SomeTitle")
        self.assertEqual(histogramJSON["xLabel"], "Categories")
        self.assertFalse(histogramJSON["continuous"])
        self.assertEqual(len(histogramJSON["data"]), 0)
        self.assertEqual(histogramJSON["average"], {})
        self.assertEqual(histogramJSON["stdDev"], {})

        histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories')

        for _ in range(5):
            histogram.addPoint("CategoryA", "FeatureA")
            histogram.addPoint("CategoryB", "FeatureB")

        for _ in range(17):
            histogram.addPoint("CategoryA", "FeatureB")
            histogram.addPoint("CategoryC", "FeatureB")

        for _ in range(3):
            histogram.addPoint("CategoryC", "FeatureA")

        jsonHistogram = histogram.toJSON()

        # Average/stdDev per feature:
        # FeatureA: avg = 2.7 stdev = 2.05
        # FeatureB: avg = 13 stdev = 5.66
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places = 1)
        self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places = 1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places = 1)
        self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places = 1)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0)
        self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3)
        self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17)

        # Test couch
        # Try to commit it to couch
        jsonHistogram["_id"] = jsonHistogram["title"]
        self.histogramDB.commitOne(jsonHistogram)

        storedJSON = self.histogramDB.document("SomeTitle")
        self.assertEqual(len(storedJSON["data"]), 3)

        return