Esempio n. 1
0
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self,
                 db_url,
                 db_name='workqueue',
                 inbox_name=None,
                 parentQueue=None,
                 queueUrl=None,
                 logger=None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging

        if inbox_name == None:
            inbox_name = "%s_inbox" % db_name

        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name,
                                              create=False,
                                              size=10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name,
                                                 create=False,
                                                 size=10000)
        self.queueUrl = sanitizeURL(queueUrl
                                    or (db_url + '/' + db_name))['url']

    def forceQueueSync(self):
        """Force a blocking replication
            - for use mainly in tests"""
        self.pullFromParent(continuous=False)
        self.sendToParent(continuous=False)

    def pullFromParent(self, continuous=True, cancel=False):
        """Replicate from parent couch - blocking"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source=self.parentCouchUrl,
                                      destination="%s/%s" %
                                      (self.hostWithAuth, self.inbox.name),
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel,
                                      useReplicator=True)
        except Exception, ex:
            self.logger.warning('Replication from %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))
Esempio n. 2
0
class WMStatsReader():

    def __init__(self, couchURL, dbName = None):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)

    def workflowsByStatus(self, statusList):
        keys = statusList
        options = {"stale": "update_after"}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys)
        workflowList = []
        for item in result["rows"]:
            workflowList.append(item["id"])
        return workflowList
    
    def replicate(self, target):
        self.couchServer.replicate(self.dbName, target, 
                                   continuous = True)
Esempio n. 3
0
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self, db_url, db_name = 'workqueue',
                 inbox_name = None, parentQueue = None,
                 queueUrl = None, logger = None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging
        
        if inbox_name == None:
            inbox_name = "%s_inbox" % db_name
            
        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name, create = False, size = 10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name, create = False, size = 10000)
        self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url']

    def forceQueueSync(self):
        """Force a blocking replication
            - for use mainly in tests"""
        self.pullFromParent(continuous = False)
        self.sendToParent(continuous = False)

    def pullFromParent(self, continuous = True, cancel = False):
        """Replicate from parent couch - blocking"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source = self.parentCouchUrl,
                                      destination = "%s/%s" % (self.hostWithAuth, self.inbox.name),
                                      filter = 'WorkQueue/queueFilter',
                                      query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl},
                                      continuous = continuous,
                                      cancel = cancel,
                                      useReplicator = True)
        except Exception, ex:
            self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex)))
Esempio n. 4
0
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self,
                 db_url,
                 db_name='workqueue',
                 inbox_name=None,
                 parentQueue=None,
                 queueUrl=None,
                 logger=None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging

        if inbox_name is None:
            inbox_name = "%s_inbox" % db_name

        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name,
                                              create=False,
                                              size=10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name,
                                                 create=False,
                                                 size=10000)
        self.queueUrl = sanitizeURL(queueUrl
                                    or (db_url + '/' + db_name))['url']
        self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'

    def forceQueueSync(self):
        """Force a blocking replication - used only in tests"""
        self.pullFromParent(continuous=False)
        self.sendToParent(continuous=False)

    def pullFromParent(self, continuous=True, cancel=False):
        """Replicate from parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source=self.parentCouchUrl,
                                      destination="%s/%s" %
                                      (self.hostWithAuth, self.inbox.name),
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication from %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def sendToParent(self, continuous=True, cancel=False):
        """Replicate to parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source="%s" % self.inbox.name,
                                      destination=self.parentCouchUrlWithAuth,
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication to %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def getElementsForSplitting(self):
        """Returns the elements from the inbox that need to be split,
        if WorkflowName specified only return elements to split for that workflow"""
        elements = self.getInboxElements(status='Negotiating')
        specs = {}  # cache as may have multiple elements for same spec
        for ele in elements:
            if ele['RequestName'] not in specs:
                wmspec = WMWorkloadHelper()
                wmspec.load(self.parentCouchUrlWithAuth +
                            "/%s/spec" % ele['RequestName'])
                specs[ele['RequestName']] = wmspec
            ele['WMSpec'] = specs[ele['RequestName']]
        del specs
        return elements

    def insertWMSpec(self, wmspec):
        """
        Insert WMSpec to backend
        """
        # Can't save spec to inbox, it needs to be visible to child queues
        # Can't save empty dict so add dummy variable
        dummy_values = {'name': wmspec.name()}
        # change specUrl in spec before saving (otherwise it points to previous url)
        wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" %
                          (self.db.name, wmspec.name()))
        return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values)

    def getWMSpec(self, name):
        """Get the spec"""
        wmspec = WMWorkloadHelper()
        wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name))
        return wmspec

    def insertElements(self, units, parent=None):
        """
        Insert element to database

        @param parent is the parent WorkQueueObject these element's belong to.
                                            i.e. a workflow which has been split
        """
        if not units:
            return
        # store spec file separately - assume all elements share same spec
        self.insertWMSpec(units[0]['WMSpec'])
        newUnitsInserted = []
        for unit in units:

            # cast to couch
            if not isinstance(unit, CouchWorkQueueElement):
                unit = CouchWorkQueueElement(self.db, elementParams=dict(unit))

            if parent:
                unit['ParentQueueId'] = parent.id
                unit['TeamName'] = parent['TeamName']
                unit['WMBSUrl'] = parent['WMBSUrl']

            if unit._couch.documentExists(unit.id):
                self.logger.info(
                    'Element "%s" already exists, skip insertion.' % unit.id)
                continue
            else:
                newUnitsInserted.append(unit)
            unit.save()
            unit._couch.commit(all_or_nothing=True)

        return newUnitsInserted

    def createWork(self, spec, **kwargs):
        """Return the Inbox element for this spec.

        This does not persist it to the database.
        """
        kwargs.update({
            'WMSpec': spec,
            'RequestName': spec.name(),
            'StartPolicy': spec.startPolicyParameters(),
            'EndPolicy': spec.endPolicyParameters(),
            'OpenForNewData': False
        })
        unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs)
        unit.id = spec.name()
        return unit

    def getElements(self,
                    status=None,
                    elementIDs=None,
                    returnIdOnly=False,
                    db=None,
                    loadSpec=False,
                    WorkflowName=None,
                    **elementFilters):
        """Return elements that match requirements

        status, elementIDs & filters are 'AND'ed together to filter elements.
        returnIdOnly causes the element not to be loaded and only the id returned
        db is used to specify which database to return from
        loadSpec causes the workflow for each spec to be loaded.
        WorkflowName may be used in the place of RequestName
        """
        key = []
        if not db:
            db = self.db
        if elementFilters.get('RequestName') and not WorkflowName:
            WorkflowName = elementFilters.pop('RequestName')

        if elementIDs:
            if elementFilters or status or returnIdOnly:
                raise ValueError(
                    "Can't specify extra filters (or return id's) when using element id's with getElements()"
                )
            elements = [
                CouchWorkQueueElement(db, i).load() for i in elementIDs
            ]
        else:
            options = {
                'include_docs': True,
                'filter': elementFilters,
                'idOnly': returnIdOnly,
                'reduce': False
            }
            # filter on workflow or status if possible
            filterName = 'elementsByWorkflow'
            if WorkflowName:
                key.append(WorkflowName)
            elif status:
                filterName = 'elementsByStatus'
                key.append(status)
            elif elementFilters.get('SubscriptionId'):
                key.append(elementFilters['SubscriptionId'])
                filterName = 'elementsBySubscription'
            # add given params to filters
            if status:
                options['filter']['Status'] = status
            if WorkflowName:
                options['filter']['RequestName'] = WorkflowName

            view = db.loadList('WorkQueue', 'filter', filterName, options, key)
            view = json.loads(view)
            if returnIdOnly:
                return view
            elements = [
                CouchWorkQueueElement.fromDocument(db, row) for row in view
            ]

        if loadSpec:
            specs = {}  # cache as may have multiple elements for same spec
            for ele in elements:
                if ele['RequestName'] not in specs:
                    wmspec = self.getWMSpec(ele['RequestName'])
                    specs[ele['RequestName']] = wmspec
                ele['WMSpec'] = specs[ele['RequestName']]
            del specs
        return elements

    def getInboxElements(self, *args, **kwargs):
        """
        Return elements from Inbox, supports same semantics as getElements()
        """
        return self.getElements(*args, db=self.inbox, **kwargs)

    def getElementsForWorkflow(self, workflow):
        """Get elements for a workflow"""
        elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', {
            'key': workflow,
            'include_docs': True,
            'reduce': False
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParent(self, parent):
        """Get elements with the given parent"""
        elements = self.db.loadView('WorkQueue', 'elementsByParent', {
            'key': parent.id,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def saveElements(self, *elements):
        """Persist elements

        Returns elements successfully saved, user must verify to catch errors
        """
        result = []
        if not elements:
            return result
        for element in elements:
            element.save()
        answer = elements[0]._couch.commit()
        result, failures = formatReply(answer, *elements)
        msg = 'Couch error saving element: "%s", error "%s", reason "%s"'
        for failed in failures:
            self.logger.error(
                msg % (failed['id'], failed['error'], failed['reason']))
        return result

    def _raiseConflictErrorAndLog(self,
                                  conflictIDs,
                                  updatedParams,
                                  dbName="workqueue"):
        errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % (
            dbName, conflictIDs, updatedParams)
        self.logger.error(errorMsg)
        raise WorkQueueError(errorMsg)

    def updateElements(self, *elementIds, **updatedParams):
        """Update given element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.db.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams)
        return

    def updateInboxElements(self, *elementIds, **updatedParams):
        """Update given inbox element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams,
                                           "workqueue_inbox")
        return

    def deleteElements(self, *elements):
        """Delete elements"""
        if not elements:
            return
        specs = {}
        for i in elements:
            i.delete()
            specs[i['RequestName']] = None
        answer = elements[0]._couch.commit()
        _, failures = formatReply(answer, *elements)
        msg = 'Couch error deleting element: "%s", error "%s", reason "%s"'
        for failed in failures:
            # only count delete as failed if document still exists
            if elements[0]._couch.documentExists(failed['id']):
                self.logger.error(
                    msg % (failed['id'], failed['error'], failed['reason']))
        # delete specs if no longer used
        for wf in specs:
            try:
                if not self.db.loadView('WorkQueue', 'elementsByWorkflow', {
                        'key': wf,
                        'limit': 1,
                        'reduce': False
                })['rows']:
                    self.db.delete_doc(wf)
            except CouchNotFoundError:
                pass

    def availableWork(self,
                      thresholds,
                      siteJobCounts,
                      team=None,
                      wfs=None,
                      excludeWorkflows=None,
                      numElems=9999999):
        """
        Get work which is available to be run

        Assume thresholds is a dictionary; keys are the site name, values are
        the maximum number of running jobs at that site.

        Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site
        name and task priorities.  The value is the number of jobs running at that
        priority.

        It will pull work until it reaches the number of elements configured (numElems).
        Since it's also used for calculating free resources, default it to "infinity"

        Note: this method will be called with no limit of work elements when it's simply
        calculating the resources available (based on what is in LQ), before it gets work
        from GQ
        """
        self.logger.info("Getting up to %d available work from %s", numElems,
                         self.queueUrl)

        excludeWorkflows = excludeWorkflows or []
        elements = []
        sortedElements = []

        # We used to pre-filter sites, looking to see if there are idle job slots
        # We don't do this anymore, as we may over-allocate
        # jobs to sites if the new jobs have a higher priority.

        # If there are no sites, punt early.
        if not thresholds:
            self.logger.error("No thresholds is set: Please check")
            return elements, thresholds, siteJobCounts

        options = {}
        options['include_docs'] = True
        options['descending'] = True
        options['resources'] = thresholds
        if team:
            options['team'] = team
            self.logger.info("setting team to %s" % team)
        if wfs:
            result = []
            for i in xrange(0, len(wfs), 20):
                options['wfs'] = wfs[i:i + 20]
                data = self.db.loadList('WorkQueue', 'workRestrictions',
                                        'availableByPriority', options)
                result.extend(json.loads(data))
        else:
            result = self.db.loadList('WorkQueue', 'workRestrictions',
                                      'availableByPriority', options)
            result = json.loads(result)
            if len(result) == 0:
                self.logger.info(
                    """No available work in WQ or didn't pass workqueue restriction
                                    - check Pileup, site white list, etc""")
            self.logger.debug("Available Work:\n %s \n for resources\n %s" %
                              (result, thresholds))
        # Iterate through the results; apply whitelist / blacklist / data
        # locality restrictions.  Only assign jobs if they are high enough
        # priority.
        for i in result:
            element = CouchWorkQueueElement.fromDocument(self.db, i)
            # filter out exclude list from abvaling
            if element['RequestName'] not in excludeWorkflows:
                sortedElements.append(element)

        # sort elements to get them in priority first and timestamp order
        sortedElements.sort(key=lambda element: element['CreationTime'])
        sortedElements.sort(key=lambda x: x['Priority'], reverse=True)

        for element in sortedElements:
            if numElems <= 0:
                self.logger.info(
                    "Reached the maximum number of elements to be pulled: %d",
                    len(elements))
                break

            if not possibleSites(element):
                self.logger.info("No possible sites for %s with doc id %s",
                                 element['RequestName'], element.id)
                continue

            prio = element['Priority']
            possibleSite = None
            sites = thresholds.keys()
            random.shuffle(sites)
            for site in sites:
                if element.passesSiteRestriction(site):
                    # Count the number of jobs currently running of greater priority
                    curJobCount = sum([
                        x[1] if x[0] >= prio else 0
                        for x in siteJobCounts.get(site, {}).items()
                    ])
                    self.logger.debug(
                        "Job Count: %s, site: %s thresholds: %s" %
                        (curJobCount, site, thresholds[site]))
                    if curJobCount < thresholds[site]:
                        possibleSite = site
                        break

            if possibleSite:
                numElems -= 1
                self.logger.debug("Possible site exists %s" %
                                  str(possibleSite))
                elements.append(element)
                if possibleSite not in siteJobCounts:
                    siteJobCounts[possibleSite] = {}
                siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \
                                                    element['Jobs'] * element.get('blowupFactor', 1.0)
            else:
                self.logger.debug(
                    "No available resources for %s with doc id %s",
                    element['RequestName'], element.id)

        return elements, thresholds, siteJobCounts

    def getActiveData(self):
        """Get data items we have work in the queue for"""
        data = self.db.loadView('WorkQueue', 'activeData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActiveParentData(self):
        """Get data items we have work in the queue for with parent"""
        data = self.db.loadView('WorkQueue', 'activeParentData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActivePileupData(self):
        """Get data items we have work in the queue for with pileup"""
        data = self.db.loadView('WorkQueue', 'activePileupData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getElementsForData(self, data):
        """Get active elements for this dbs & data combo"""
        elements = self.db.loadView('WorkQueue', 'elementsByData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParentData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByParentData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForPileupData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByPileupData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def isAvailable(self):
        """Is the server available, i.e. up and not compacting"""
        try:
            compacting = self.db.info()['compact_running']
            if compacting:
                self.logger.info("CouchDB compacting - try again later.")
                return False
        except Exception as ex:
            self.logger.error("CouchDB unavailable: %s" % str(ex))
            return False
        return True

    def getWorkflows(self, includeInbox=False, includeSpecs=False):
        """Returns workflows known to workqueue"""
        result = set([
            x['key'] for x in self.db.loadView(
                'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
        ])
        if includeInbox:
            result = result | set([
                x['key'] for x in self.inbox.loadView(
                    'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
            ])
        if includeSpecs:
            result = result | set([
                x['key'] for x in self.db.loadView('WorkQueue',
                                                   'specsByWorkflow')['rows']
            ])
        return list(result)

    def queueLength(self):
        """Return number of available elements"""
        return self.db.loadView('WorkQueue', 'availableByPriority',
                                {'limit': 0})['total_rows']

    def fixConflicts(self):
        """Fix elements in conflict

        Each local queue runs this to resolve its conflicts with global,
        resolution propagates up to global.

        Conflicting elements are merged into one element with others deleted.

        This will fail if elements are modified during the resolution -
        if this happens rerun.
        """
        for db in [self.inbox, self.db]:
            for row in db.loadView('WorkQueue', 'conflicts')['rows']:
                element_id = row['id']
                try:
                    conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \
                                            for rev in row['value']]
                    fixed_elements = fixElementConflicts(*conflicting_elements)
                    if self.saveElements(fixed_elements[0]):
                        self.saveElements(
                            *fixed_elements[1:]
                        )  # delete others (if merged value update accepted)
                except Exception as ex:
                    self.logger.error("Error resolving conflict for %s: %s" %
                                      (element_id, str(ex)))

    def recordTaskActivity(self, taskname, comment=''):
        """Record a task for monitoring"""
        try:
            record = self.db.document('task_activity')
        except CouchNotFoundError:
            record = Document('task_activity')
        record.setdefault('tasks', {})
        record['tasks'].setdefault(taskname, {})
        record['tasks'][taskname]['timestamp'] = time.time()
        record['tasks'][taskname]['comment'] = comment
        try:
            self.db.commitOne(record)
        except Exception as ex:
            self.logger.error("Unable to update task %s freshness: %s" %
                              (taskname, str(ex)))

    def getWMBSInjectStatus(self, request=None):
        """
        This service only provided by global queue except on draining agent
        """
        options = {'group': True, 'reduce': True}
        if request:
            options.update(key=request)
        data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest',
                                options)
        if request:
            if data['rows']:
                injectionStatus = data['rows'][0]['value']
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                return injectionStatus and not requestOpen
            else:
                raise WorkQueueNoMatchingElements("%s not found" % request)
        else:
            injectionStatus = dict(
                (x['key'], x['value']) for x in data.get('rows', []))
            finalInjectionStatus = []
            for request in injectionStatus.keys():
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                finalInjectionStatus.append(
                    {request: injectionStatus[request] and not requestOpen})

            return finalInjectionStatus

    def getWorkflowNames(self, inboxFlag=False):
        """Get workflow names from workqueue db"""
        if inboxFlag:
            db = self.inbox
        else:
            db = self.db
        data = db.loadView('WorkQueue', 'elementsByWorkflow', {
            'stale': "update_after",
            'reduce': True,
            'group': True
        })
        return [x['key'] for x in data.get('rows', [])]

    def deleteWQElementsByWorkflow(self, workflowNames):
        """
        delete workqueue elements belongs to given workflow names
        it doen't check the status of workflow so need to be careful to use this.
        Pass only workflows which has the end status
        """
        deleted = 0
        dbs = [self.db, self.inbox]
        if not isinstance(workflowNames, list):
            workflowNames = [workflowNames]

        if len(workflowNames) == 0:
            return deleted
        options = {}
        options["stale"] = "update_after"
        options["reduce"] = False

        for couchdb in dbs:
            result = couchdb.loadView("WorkQueue", "elementsByWorkflow",
                                      options, workflowNames)
            ids = []
            for entry in result["rows"]:
                ids.append(entry["id"])
            if ids:
                couchdb.bulkDeleteByIDs(ids)
                deleted += len(ids)
        # delete the workflow with spec from workqueue db
        for wf in workflowNames:
            self.db.delete_doc(wf)
        return deleted
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self, db_url, db_name = 'workqueue',
                 inbox_name = None, parentQueue = None,
                 queueUrl = None, logger = None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging
        
        if inbox_name == None:
            inbox_name = "%s_inbox" % db_name
            
        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name, create = False, size = 10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name, create = False, size = 10000)
        self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url']

    def forceQueueSync(self):
        """Force a blocking replication - used only in tests"""
        self.pullFromParent(continuous = False)
        self.sendToParent(continuous = False)

    def pullFromParent(self, continuous = True, cancel = False):
        """Replicate from parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source = self.parentCouchUrl,
                                      destination = "%s/%s" % (self.hostWithAuth, self.inbox.name),
                                      filter = 'WorkQueue/queueFilter',
                                      query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl},
                                      continuous = continuous,
                                      cancel = cancel)
        except Exception as ex:
            self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex)))

    def sendToParent(self, continuous = True, cancel = False):
        """Replicate to parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source = "%s" % self.inbox.name,
                                      destination = self.parentCouchUrlWithAuth,
                                      filter = 'WorkQueue/queueFilter',
                                      query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl},
                                      continuous = continuous,
                                      cancel = cancel)
        except Exception as ex:
            self.logger.warning('Replication to %s failed: %s' % (self.parentCouchUrl, str(ex)))


    def getElementsForSplitting(self):
        """Returns the elements from the inbox that need to be split,
        if WorkflowName specified only return elements to split for that workflow"""
        elements = self.getInboxElements(status = 'Negotiating')
        specs = {} # cache as may have multiple elements for same spec
        for ele in elements:
            if ele['RequestName'] not in specs:
                wmspec = WMWorkloadHelper()
                wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName'])
                specs[ele['RequestName']] = wmspec
            ele['WMSpec'] = specs[ele['RequestName']]
        del specs
        return elements


    def insertWMSpec(self, wmspec):
        """
        Insert WMSpec to backend
        """
        # Can't save spec to inbox, it needs to be visible to child queues
        # Can't save empty dict so add dummy variable
        dummy_values = {'name' : wmspec.name()}
        # change specUrl in spec before saving (otherwise it points to previous url)
        wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" % (self.db.name, wmspec.name()))
        return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values)


    def getWMSpec(self, name):
        """Get the spec"""
        wmspec = WMWorkloadHelper()
        wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name))
        return wmspec

    def insertElements(self, units, parent = None):
        """
        Insert element to database

        @param parent is the parent WorkQueueObject these element's belong to.
                                            i.e. a workflow which has been split
        """
        if not units:
            return
        # store spec file separately - assume all elements share same spec
        self.insertWMSpec(units[0]['WMSpec'])
        for unit in units:

            # cast to couch
            if not isinstance(unit, CouchWorkQueueElement):
                unit = CouchWorkQueueElement(self.db, elementParams = dict(unit))

            if parent:
                unit['ParentQueueId'] = parent.id
                unit['TeamName'] = parent['TeamName']
                unit['WMBSUrl'] = parent['WMBSUrl']

            if unit._couch.documentExists(unit.id):
                self.logger.info('Element "%s" already exists, skip insertion.' % unit.id)
                continue
            unit.save()

        unit._couch.commit(all_or_nothing = True)
        return

    def createWork(self, spec, **kwargs):
        """Return the Inbox element for this spec.

        This does not persist it to the database.
        """
        kwargs.update({'WMSpec' : spec,
                       'RequestName' : spec.name(),
                       'StartPolicy' : spec.startPolicyParameters(),
                       'EndPolicy' : spec.endPolicyParameters(),
                       'OpenForNewData' : True
                      })
        unit = CouchWorkQueueElement(self.inbox, elementParams = kwargs)
        unit.id = spec.name()
        return unit

    def getElements(self, status = None, elementIDs = None, returnIdOnly = False,
                    db = None, loadSpec = False, WorkflowName = None, **elementFilters):
        """Return elements that match requirements

        status, elementIDs & filters are 'AND'ed together to filter elements.
        returnIdOnly causes the element not to be loaded and only the id returned
        db is used to specify which database to return from
        loadSpec causes the workflow for each spec to be loaded.
        WorkflowName may be used in the place of RequestName
        """
        key = []
        if not db:
            db = self.db
        if elementFilters.get('RequestName') and not WorkflowName:
            WorkflowName = elementFilters.pop('RequestName')

        if elementIDs:
            if elementFilters or status or returnIdOnly:
                raise ValueError("Can't specify extra filters (or return id's) when using element id's with getElements()")
            elements = [CouchWorkQueueElement(db, i).load() for i in elementIDs]
        else:
            options = {'include_docs' : True, 'filter' : elementFilters, 'idOnly' : returnIdOnly, 'reduce' : False}
            # filter on workflow or status if possible
            filter = 'elementsByWorkflow'
            if WorkflowName:
                key.append(WorkflowName)
            elif status:
                filter = 'elementsByStatus'
                key.append(status)
            elif elementFilters.get('SubscriptionId'):
                key.append(elementFilters['SubscriptionId'])
                filter = 'elementsBySubscription'
            # add given params to filters
            if status:
                options['filter']['Status'] = status
            if WorkflowName:
                options['filter']['RequestName'] = WorkflowName

            view = db.loadList('WorkQueue', 'filter', filter, options, key)
            view = json.loads(view)
            if returnIdOnly:
                return view
            elements = [CouchWorkQueueElement.fromDocument(db, row) for row in view]

        if loadSpec:
            specs = {} # cache as may have multiple elements for same spec
            for ele in elements:
                if ele['RequestName'] not in specs:
                    wmspec = self.getWMSpec(ele['RequestName'])
                    specs[ele['RequestName']] = wmspec
                ele['WMSpec'] = specs[ele['RequestName']]
            del specs
        return elements

    def getInboxElements(self, *args, **kwargs):
        """
        Return elements from Inbox, supports same semantics as getElements()
        """
        return self.getElements(*args, db = self.inbox, **kwargs)

    def getElementsForWorkflow(self, workflow):
        """Get elements for a workflow"""
        elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', {'key' : workflow, 'include_docs' : True, 'reduce' : False})
        return [CouchWorkQueueElement.fromDocument(self.db,
                                                   x['doc'])
                for x in elements.get('rows', [])]

    def getElementsForParent(self, parent):
        """Get elements with the given parent"""
        elements = self.db.loadView('WorkQueue', 'elementsByParent', {'key' : parent.id, 'include_docs' : True})
        return [CouchWorkQueueElement.fromDocument(self.db,
                                                   x['doc'])
                for x in elements.get('rows', [])]

    def saveElements(self, *elements):
        """Persist elements

        Returns elements successfully saved, user must verify to catch errors
        """
        result = []
        if not elements:
            return result
        for element in elements:
            element.save()
        answer = elements[0]._couch.commit()
        result, failures = formatReply(answer, *elements)
        msg = 'Couch error saving element: "%s", error "%s", reason "%s"'
        for failed in failures:
            self.logger.error(msg % (failed['id'], failed['error'], failed['reason']))
        return result

    def updateElements(self, *elementIds, **updatedParams):
        """Update given element's (identified by id) with new parameters"""
        if not elementIds:
            return
        uri = "/" + self.db.name + "/_design/WorkQueue/_update/in-place/"
        optionsArg = {}
        if "options" in updatedParams:
            optionsArg.update(updatedParams.pop("options"))
        data = {"updates" : json.dumps(updatedParams),
                "options" : json.dumps(optionsArg)}
        for ele in elementIds:
            thisuri = uri + ele + "?" + urllib.urlencode(data)
            self.db.makeRequest(uri = thisuri, type = 'PUT')
        return


    def updateInboxElements(self, *elementIds, **updatedParams):
        """Update given inbox element's (identified by id) with new parameters"""
        uri = "/" + self.inbox.name + "/_design/WorkQueue/_update/in-place/"
        optionsArg = {}
        if "options" in updatedParams:
            optionsArg.update(updatedParams.pop("options"))
        data = {"updates" : json.dumps(updatedParams),
                "options" : json.dumps(optionsArg)}
        for ele in elementIds:
            thisuri = uri + ele + "?" + urllib.urlencode(data)
            self.inbox.makeRequest(uri = thisuri, type = 'PUT')
        return


    def deleteElements(self, *elements):
        """Delete elements"""
        if not elements:
            return
        specs = {}
        for i in elements:
            i.delete()
            specs[i['RequestName']] = None
        answer = elements[0]._couch.commit()
        result, failures = formatReply(answer, *elements)
        msg = 'Couch error deleting element: "%s", error "%s", reason "%s"'
        for failed in failures:
            # only count delete as failed if document still exists
            if elements[0]._couch.documentExists(failed['id']):
                self.logger.error(msg % (failed['id'], failed['error'], failed['reason']))
        # delete specs if no longer used
        for wf in specs:
            try:
                if not self.db.loadView('WorkQueue', 'elementsByWorkflow',
                                        {'key' : wf, 'limit' : 1, 'reduce' : False})['rows']:
                    self.db.delete_doc(wf)
            except CouchNotFoundError:
                pass


    def availableWork(self, thresholds, siteJobCounts, teams = None, wfs = None):
        """
        Get work which is available to be run

        Assume thresholds is a dictionary; keys are the site name, values are
        the maximum number of running jobs at that site.

        Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site
        name and task priorities.  The value is the number of jobs running at that
        priority.
        """
        self.logger.info("Getting available work from %s/%s" % 
                         (sanitizeURL(self.server.url)['url'], self.db.name))
        elements = []

        # We used to pre-filter sites, looking to see if there are idle job slots
        # We don't do this anymore, as we may over-allocate
        # jobs to sites if the new jobs have a higher priority.

        # If there are no sites, punt early.
        if not thresholds:
            self.logger.error("No thresholds is set: Please check")
            return elements, thresholds, siteJobCounts

        options = {}
        options['include_docs'] = True
        options['descending'] = True
        options['resources'] = thresholds
        if teams:
            options['teams'] = teams
            self.logger.info("setting teams %s" % teams)
        if wfs:
            result = []
            for i in xrange(0, len(wfs), 20):
                options['wfs'] = wfs[i:i+20]
                data = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options)
                result.extend(json.loads(data))
            # sort final list
            result.sort(key = lambda x: x['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement']['Priority'])
        else:
            result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options)
            result = json.loads(result)
            if len(result) == 0:
                self.logger.info("""No available work in WQ or didn't pass workqueue restriction 
                                    - check Pileup, site white list, etc""")
            self.logger.debug("Available Work:\n %s \n for resources\n %s" % (result, thresholds))
        # Iterate through the results; apply whitelist / blacklist / data
        # locality restrictions.  Only assign jobs if they are high enough
        # priority.
        for i in result:
            element = CouchWorkQueueElement.fromDocument(self.db, i)
            prio = element['Priority']

            possibleSite = None
            sites = thresholds.keys()
            random.shuffle(sites)
            for site in sites:
                if element.passesSiteRestriction(site):
                    # Count the number of jobs currently running of greater priority
                    prio = element['Priority']
                    curJobCount = sum(map(lambda x : x[1] if x[0] >= prio else 0, siteJobCounts.get(site, {}).items()))
                    self.logger.debug("Job Count: %s, site: %s threshods: %s" % (curJobCount, site, thresholds[site]))
                    if curJobCount < thresholds[site]:
                        possibleSite = site
                        break

            if possibleSite:
                self.logger.debug("Possible site exists %s" % str(possibleSite))
                elements.append(element)
                if site not in siteJobCounts:
                    siteJobCounts[site] = {}
                siteJobCounts[site][prio] = siteJobCounts[site].setdefault(prio, 0) + element['Jobs']*element.get('blowupFactor', 1.0)
            else:
                self.logger.info("No possible site for %s" % element['RequestName'])
        # sort elements to get them in priority first and timestamp order
        elements.sort(key=lambda element: element['CreationTime'])
        elements.sort(key = lambda x: x['Priority'], reverse = True)
        
        return elements, thresholds, siteJobCounts

    def getActiveData(self):
        """Get data items we have work in the queue for"""
        data = self.db.loadView('WorkQueue', 'activeData', {'reduce' : True, 'group' : True})
        return [{'dbs_url' : x['key'][0],
                 'name' : x['key'][1]} for x in data.get('rows', [])]

    def getActiveParentData(self):
        """Get data items we have work in the queue for with parent"""
        data = self.db.loadView('WorkQueue', 'activeParentData', {'reduce' : True, 'group' : True})
        return [{'dbs_url' : x['key'][0],
                 'name' : x['key'][1]} for x in data.get('rows', [])]

    def getActivePileupData(self):
        """Get data items we have work in the queue for with pileup"""
        data = self.db.loadView('WorkQueue', 'activePileupData', {'reduce' : True, 'group' : True})
        return [{'dbs_url' : x['key'][0],
                 'name' : x['key'][1]} for x in data.get('rows', [])]

    def getElementsForData(self, dbs, data):
        """Get active elements for this dbs & data combo"""
        elements = self.db.loadView('WorkQueue', 'elementsByData', {'key' : data, 'include_docs' : True})
        return [CouchWorkQueueElement.fromDocument(self.db,
                                                   x['doc'])
                for x in elements.get('rows', [])]

    def getElementsForParentData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByParentData', {'key' : data, 'include_docs' : True})
        return [CouchWorkQueueElement.fromDocument(self.db,
                                                   x['doc'])
                for x in elements.get('rows', [])]

    def getElementsForPileupData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByPileupData', {'key' : data, 'include_docs' : True})
        return [CouchWorkQueueElement.fromDocument(self.db,
                                                   x['doc'])
                for x in elements.get('rows', [])]

    def isAvailable(self):
        """Is the server available, i.e. up and not compacting"""
        try:
            compacting = self.db.info()['compact_running']
            if compacting:
                self.logger.info("CouchDB compacting - try again later.")
                return False
        except Exception as ex:
            self.logger.error("CouchDB unavailable: %s" % str(ex))
            return False
        return True

    def getWorkflows(self, includeInbox = False, includeSpecs = False):
        """Returns workflows known to workqueue"""
        result = set([x['key'] for x in self.db.loadView('WorkQueue', 'elementsByWorkflow', {'group' : True})['rows']])
        if includeInbox:
            result = result | set([x['key'] for x in self.inbox.loadView('WorkQueue', 'elementsByWorkflow', {'group' : True})['rows']])
        if includeSpecs:
            result = result | set([x['key'] for x in self.db.loadView('WorkQueue', 'specsByWorkflow')['rows']])
        return list(result)

    def queueLength(self):
        """Return number of available elements"""
        return self.db.loadView('WorkQueue', 'availableByPriority', {'limit' : 0})['total_rows']

    def fixConflicts(self):
        """Fix elements in conflict

        Each local queue runs this to resolve its conflicts with global,
        resolution propagates up to global.

        Conflicting elements are merged into one element with others deleted.

        This will fail if elements are modified during the resolution -
        if this happens rerun.
        """
        for db in [self.inbox, self.db]:
            for row in db.loadView('WorkQueue', 'conflicts')['rows']:
                element_id = row['id']
                try:
                    conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \
                                                                                for rev in row['value']]
                    fixed_elements = fixElementConflicts(*conflicting_elements)
                    if self.saveElements(fixed_elements[0]):
                        self.saveElements(*fixed_elements[1:]) # delete others (if merged value update accepted)
                except Exception as ex:
                    self.logger.error("Error resolving conflict for %s: %s" % (element_id, str(ex)))

    def recordTaskActivity(self, taskname, comment = ''):
        """Record a task for monitoring"""
        try:
            record = self.db.document('task_activity')
        except CouchNotFoundError:
            record = Document('task_activity')
        record.setdefault('tasks', {})
        record['tasks'].setdefault(taskname, {})
        record['tasks'][taskname]['timestamp'] = time.time()
        record['tasks'][taskname]['comment'] = comment
        try:
            self.db.commitOne(record)
        except Exception as ex:
            self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))

    def getWMBSInjectStatus(self, request = None):
        """
        This service only provided by global queue
        """
        options = {'group' : True}
        if request:
            options.update(key = request)
        data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest',
                                options)
        if request:
            if data['rows']:
                injectionStatus = data['rows'][0]['value']
                inboxElement = self.getInboxElements(elementIDs = [data['rows'][0]['key']])
                return injectionStatus and not inboxElement[0].get('OpenForNewData', False)
            else:
                raise WorkQueueNoMatchingElements("%s not found" % request)
        else:
            injectionStatus = dict((x['key'], x['value']) for x in data.get('rows', []))
            inboxElements = self.getInboxElements(elementIDs = injectionStatus.keys())
            finalInjectionStatus = []
            for element in inboxElements:
                if not element.get('OpenForNewData', False) and injectionStatus[element._id]:
                    finalInjectionStatus.append({element._id : True})
                else:
                    finalInjectionStatus.append({element._id : False})

            return finalInjectionStatus
        
    def getWorkflowNames(self, inboxFlag = False):
        """Get workflow names from workqueue db"""
        if inboxFlag:
            db = self.inbox
        else:
            db = self.db
        data = db.loadView('WorkQueue', 'elementsByWorkflow', 
                           {'stale': "update_after", 'reduce' : True, 'group' : True})
        return [x['key'] for x in data.get('rows', [])]
    
    def deleteWQElementsByWorkflow(self, workflowNames):
        """
        delete workqueue elements belongs to given workflow names 
        it doen't check the status of workflow so need to be careful to use this.
        Pass only workflows which has the end status
        """
        deleted = 0
        dbs = [self.db, self.inbox]
        if not isinstance(workflowNames, list):
            workflowNames = [workflowNames]
        
        if len(workflowNames) == 0:
            return deleted
        
        options = {} 
        options["stale"] = "update_after"
        options["reduce"] = False
        
        for couchdb in dbs:
            result = couchdb.loadView("WorkQueue", "elementsByWorkflow", options, workflowNames)
            ids = []
            for entry in result["rows"]:
                ids.append(entry["id"])
            if ids:
                couchdb.bulkDeleteByIDs(ids)
                deleted += len(ids)
        # delete the workflow with spec from workqueue db
        for wf in workflowNames:
            self.db.delete_doc(wf)
        return deleted
Esempio n. 6
0
class CMSCouchTest(unittest.TestCase):

    test_counter = 0

    def setUp(self):
        # Make an instance of the server
        self.server = CouchServer(os.getenv("COUCHURL", 'http://*****:*****@localhost:5984'))
        self.testname = self.id().split('.')[-1]
        # Create a database, drop an existing one first
        dbname = 'cmscouch_unittest_%s' % self.testname.lower()

        if dbname in self.server.listDatabases():
            self.server.deleteDatabase(dbname)

        self.server.createDatabase(dbname)
        self.db = self.server.connectDatabase(dbname)

    def tearDown(self):
        if sys.exc_info()[0] == None:
            # This test has passed, clean up after it
            dbname = 'cmscouch_unittest_%s' % self.testname.lower()
            self.server.deleteDatabase(dbname)

    def testCommitOne(self):
        # Can I commit one dict
        doc = {'foo':123, 'bar':456}
        id = self.db.commitOne(doc)[0]['id']
        # What about a Document
        doc = Document(inputDict = doc)
        id = self.db.commitOne(doc)[0]['id']

    def testCommitOneWithQueue(self):
        """
        CommitOne bypasses the queue, but it should maintain the queue if
        present for a future call to commit.
        """
        # Queue up five docs
        doc = {'foo':123, 'bar':456}
        for i in range(1,6):
            self.db.queue(doc)
        # Commit one Document
        doc = Document(inputDict = doc)
        id = self.db.commitOne(doc)[0]['id']
        self.assertEqual(1, len(self.db.allDocs()['rows']))
        self.db.commit()
        self.assertEqual(6, len(self.db.allDocs()['rows']))

    def testTimeStamping(self):
        doc = {'foo':123, 'bar':456}
        id = self.db.commitOne(doc, timestamp=True)[0]['id']
        doc = self.db.document(id)
        self.assertTrue('timestamp' in doc.keys())

    def testDeleteDoc(self):
        doc = {'foo':123, 'bar':456}
        self.db.commitOne(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(1, len(all_docs['rows']))

        # The db.delete_doc is immediate
        id = all_docs['rows'][0]['id']
        self.db.delete_doc(id)
        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testDeleteQueuedDocs(self):
        doc1 = {'foo':123, 'bar':456}
        doc2 = {'foo':789, 'bar':101112}
        self.db.queue(doc1)
        self.db.queue(doc2)
        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))
        for res in all_docs['rows']:
            id = res['id']
            doc = self.db.document(id)
            self.db.queueDelete(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))

        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testReplicate(self):
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo':123}, timestamp=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)

        # wait for a few seconds to replication to be triggered.
        time.sleep(1)
        self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id))
        self.server.deleteDatabase(repl_db.name)

    def testSlashInDBName(self):
        """
        Slashes are a valid character in a database name, and are useful as it
        creates a directory strucutre for the couch data files.
        """
        db_name = 'wmcore/unittests'
        try:
            self.server.deleteDatabase(db_name)
        except:
            # Ignore this - the database shouldn't already exist
            pass

        db = self.server.createDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db_name = 'wmcore/unittests'
        db = self.server.connectDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db = Database(db_name, url = os.environ["COUCHURL"])
        info = db.info()
        assert info['db_name'] == db_name

        self.server.deleteDatabase(db_name)

    def testInvalidName(self):
        """
        Capitol letters are not allowed in database names.
        """
        db_name = 'Not A Valid Name'
        self.assertRaises(ValueError, self.server.createDatabase, db_name)
        self.assertRaises(ValueError, self.server.deleteDatabase, db_name)
        self.assertRaises(ValueError, self.server.connectDatabase, db_name)
        self.assertRaises(ValueError, Database, db_name)

    def testDocumentSerialisation(self):
        """
        A document should be writable into the couchdb with a timestamp.
        """
        d = Document()
        d['foo'] = 'bar'
        doc_info = self.db.commit(doc=d, timestamp=True)[0]
        d_from_db = self.db.document(doc_info['id'])
        self.assertEqual(d['foo'], d_from_db['foo'])
        self.assertEqual(d['timestamp'], d_from_db['timestamp'])

    def testAttachments(self):
        """
        Test uploading attachments with and without checksumming
        """
        doc = self.db.commitOne({'foo':'bar'}, timestamp=True)[0]
        attachment1 = "Hello"
        attachment2 = "How are you today?"
        attachment3 = "I'm very well, thanks for asking"
        attachment4 = "Lovely weather we're having"
        attachment5 = "Goodbye"
        keyhash = hashlib.md5()
        keyhash.update(attachment5)
        attachment5_md5 = keyhash.digest()
        attachment5_md5 = base64.b64encode(attachment5_md5)
        attachment6 = "Good day to you, sir!"
        #TODO: add a binary attachment - e.g. tar.gz
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5)

        self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123')

    def testRevisionHandling(self):
        # This test won't work from an existing database, conflicts will be preserved, so
        # ruthlessly remove the databases to get a clean slate.
        try:
            self.server.deleteDatabase(self.db.name)
        except CouchNotFoundError:
            pass # Must have been deleted already

        try:
            self.server.deleteDatabase(self.db.name + 'repl')
        except CouchNotFoundError:
            pass # Must have been deleted already

        # I'm going to create a conflict, so need a replica db
        self.db = self.server.connectDatabase(self.db.name)
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo':123}, timestamp=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)
        time.sleep(1)

        doc_v2 = self.db.document(doc_id)
        doc_v2['bar'] = 456
        doc_id_rev2 = self.db.commitOne(doc_v2)[0]
        doc_v2 = self.db.document(doc_id)

        #now update the replica
        conflict_doc = repl_db.document(doc_id)
        conflict_doc['bar'] = 101112
        repl_db.commitOne(conflict_doc)

        #replicate, creating the conflict
        self.server.replicate(self.db.name, repl_db.name)
        time.sleep(1)

        conflict_view = {'map':"function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}"}
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        # Should have one conflict in the repl database
        self.assertEqual(data['total_rows'], 1)
        # Should have no conflicts in the source database
        self.assertEqual(self.db.post('/%s/_temp_view' % self.db.name, conflict_view)['total_rows'], 0)
        self.assertTrue(repl_db.documentExists(data['rows'][0]['id'], rev=data['rows'][0]['key'][0]))

        repl_db.delete_doc(data['rows'][0]['id'], rev=data['rows'][0]['key'][0])
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        self.assertEqual(data['total_rows'], 0)
        self.server.deleteDatabase(repl_db.name)

        #update it again
        doc_v3 = self.db.document(doc_id)
        doc_v3['baz'] = 789
        doc_id_rev3 = self.db.commitOne(doc_v3)[0]
        doc_v3 = self.db.document(doc_id)

        #test that I can pull out an old revision
        doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev'])
        self.assertEqual(doc_v1, doc_v1_test)

        #test that I can check a revision exists
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev']))

        self.assertFalse(self.db.documentExists(doc_id, rev='1'+doc_v2['_rev']))

        #why you shouldn't rely on rev
        self.db.compact(blocking=True)
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev']))
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev']))
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev']))

    def testCommit(self):
        """
        Test queue and commit modes
        """
        # try to commit 2 random docs
        doc = {'foo':123, 'bar':456}
        self.db.queue(doc)
        self.db.queue(doc)
        self.assertEqual(2, len(self.db.commit()))

        # committing 2 docs with the same id will fail
        self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "1", inputDict = {'foo':1234, 'bar':456}))
        answer = self.db.commit()
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0]['ok'], True)
        self.assertEqual(answer[1]['error'], 'conflict')

        # all_or_nothing mode ignores conflicts
        self.db.queue(Document(id = "2", inputDict = doc))
        self.db.queue(Document(id = "2", inputDict = {'foo':1234, 'bar':456}))
        answer = self.db.commit(all_or_nothing = True)
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0]['id'], '2')
        self.assertEqual(answer[1]['id'], '2')

        # callbacks can do stuff when conflicts arise
        # this particular one just overwrites the document
        def callback(db, data, result):
            for doc in data['docs']:
                if doc['_id'] == result['id']:
                    doc['_rev'] = db.document(doc['_id'])['_rev']
                    retval = db.commitOne(doc)
            return retval[0]

        self.db.queue(Document(id = "2", inputDict = {'foo':5, 'bar':6}))
        answer = self.db.commit(callback = callback)
        self.assertEqual(1, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        updatedDoc = self.db.document('2')
        self.assertEqual(updatedDoc['foo'], 5)
        self.assertEqual(updatedDoc['bar'], 6)

        return

    def testUpdateHandler(self):
        """
        Test that update function support works
        """

        update_ddoc = {
            '_id':'_design/foo',
            'language': 'javascript',
            'updates':{
                "bump-counter" : 'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}',
            }
        }
        self.db.commit(update_ddoc)
        doc = {'foo': 123, 'counter': 0}
        doc_id = self.db.commit(doc)[0]['id']
        self.assertEqual("bumped it!", self.db.updateDocument(doc_id, 'foo', 'bump-counter'))

        self.assertEqual(1, self.db.document(doc_id)['counter'])


    def testList(self):
        """
        Test list function works ok
        """
        update_ddoc = {
            '_id':'_design/foo',
            'language': 'javascript',
            'views' : {
                       'all' : {
                                'map' : 'function(doc) {emit(null, null) }'
                                },
                       },
            'lists' : {
                'errorinoutput' : 'function(doc, req) {send("A string with the word error in")}',
                'malformed' : 'function(doc, req) {somethingtoraiseanerror}',
            }
        }
        self.db.commit(update_ddoc)
        # approriate errors raised
        self.assertRaises(CouchNotFoundError, self.db.loadList, 'foo', 'error', 'view_doesnt_exist')
        self.assertRaises(CouchInternalServerError, self.db.loadList, 'foo', 'malformed', 'all')
        # error in list output string shouldn't raise an error
        self.assertEqual(self.db.loadList('foo', 'errorinoutput', 'all'),
                         "A string with the word error in")

    def testAllDocs(self):
        """
        Test AllDocs with options
        """
        self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "2", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "3", inputDict = {'foo':123, 'bar':456}))

        self.db.commit()
        self.assertEqual(3, len(self.db.allDocs()['rows']))
        self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows']))
        self.assertEqual(2, len(self.db.allDocs(keys = ["1", "3"])['rows']))
        self.assertEqual(1, len(self.db.allDocs({'limit':1}, ["1", "3"])['rows']))
        self.assertTrue('error' in self.db.allDocs(keys = ["1", "4"])['rows'][1])

    def testUpdateBulkDocuments(self):
        """
        Test AllDocs with options
        """
        self.db.queue(Document(id="1", inputDict={'foo':123, 'bar':456}))
        self.db.queue(Document(id="2", inputDict={'foo':123, 'bar':456}))
        self.db.queue(Document(id="3", inputDict={'foo':123, 'bar':456}))
        self.db.commit()

        self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 333}, 2)
        result = self.db.allDocs({"include_docs": True})['rows']
        self.assertEqual(3, len(result))
        for item in result:
            self.assertEqual(333, item['doc']['foo'])

        self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 222}, 10)
        result = self.db.allDocs({"include_docs": True})['rows']
        self.assertEqual(3, len(result))
        for item in result:
            self.assertEqual(222, item['doc']['foo'])

    def testUpdateHandlerAndBulkUpdateProfile(self):
        """
        Test that update function support works
        """
        # for actual test increase the size value: For 10000 records, 96 sec vs 4 sec
        size = 100
        for i in range(size):
            self.db.queue(Document(id="%s" % i, inputDict={'name':123, 'counter':0}))

        update_doc = {
            '_id':'_design/foo',
            'language': 'javascript',
            'updates':{
                "change-counter" : """function(doc, req) { if (doc) { var data = JSON.parse(req.body);
                                      for (var field in data) {doc.field = data.field;} return [doc, 'OK'];}}""",
            }
        }

        self.db.commit(update_doc)
        start = time.time()
        for id in range(size):
            doc_id = "%s" % id
            self.db.updateDocument(doc_id, 'foo', 'change-counter', {'counter': 1}, useBody=True)
        end = time.time()

        print("update handler: %s sec" % (end - start))

        start = time.time()
        ids = []
        for id in range(size):
            doc_id = "%s" % id
            ids.append(doc_id)
        self.db.updateBulkDocumentsWithConflictHandle(ids, {'counter': 2}, 1000)
        end = time.time()

        print("bulk update: %s sec" % (end - start))
Esempio n. 7
0
class WMStatsWriter(WMStatsReader):

    def __init__(self, couchURL, dbName = None):
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)

    def uploadData(self, docs):
        """
        upload to given couchURL using cert and key authentication and authorization
        """
        # add delete docs as well for the compaction
        # need to check whether delete and update is successful
        if type(docs) == dict:
            docs = [docs]
        for doc in docs:
            self.couchDB.queue(doc)
        return self.couchDB.commit(returndocs = True)

    def insertRequest(self, schema):
        doc = monitorDocFromRequestSchema(schema)
        return self.insertGenericRequest(doc)

    def insertGenericRequest(self, doc):
        result = self.couchDB.updateDocument(doc['_id'], 'WMStats',
                                    'insertRequest',
                                    fields={'doc': JSONEncoder().encode(doc)})
        self.updateRequestStatus(doc['_id'], "new")
        return result

    def updateRequestStatus(self, request, status):
        statusTime = {'status': status, 'update_time': int(time.time())}
        return self.couchDB.updateDocument(request, 'WMStats', 'requestStatus',
                    fields={'request_status': JSONEncoder().encode(statusTime)})

    def updateTeam(self, request, team):
        return self.couchDB.updateDocument(request, 'WMStats', 'team',
                                         fields={'team': team})

    def insertTotalStats(self, request, totalStats):
        """
        update the total stats of given workflow (total_jobs, input_events, input_lumis, input_num_files)
        """
        return self.couchDB.updateDocument(request, 'WMStats', 'totalStats',
                                         fields=totalStats)

    def updateFromWMSpec(self, spec):
        # currently only update priority and siteWhitelist
        # complex field needs to be JSON encoded
        # assuming all the toplevel tasks has the same site white lists
        #priority is priority + user priority + group priority
        fields = {'priority': spec.priority(),
                  'site_white_list': spec.getTopLevelTask()[0].siteWhitelist()}
        return self.couchDB.updateDocument(spec.name(), 'WMStats',
                    'generalFields',
                    fields={'general_fields': JSONEncoder().encode(fields)})

    def updateRequestsInfo(self, docs):
        """
        bulk update for request documents.
        TODO: change to bulk update handler when it gets supported
        """
        for doc in docs:
            del doc['type']
            self.couchDB.updateDocument(doc['workflow'], 'WMStats',
                        'generalFields',
                        fields={'general_fields': JSONEncoder().encode(doc)})

    def updateAgentInfo(self, agentInfo):
        return self.couchDB.updateDocument(agentInfo['_id'], 'WMStats',
                        'agentInfo',
                        fields={'agent_info': JSONEncoder().encode(agentInfo)})

    def deleteOldDocs(self, days):
        """
        delete the documents from wmstats db older than param 'days'
        """
        sec = int(days * 24 * 60 *60)
        threshold = int(time.time()) - sec
        options = {"startkey": threshold, "descending": True,
                   "stale": "update_after"}
        result = self.couchDB.loadView("WMStats", "time", options)

        for row in result['rows']:
            doc = {}
            doc['_id'] = row['value']['id']
            doc['_rev'] = row['value']['rev']
            self.couchDB.queueDelete(doc)
        committed = self.couchDB.commit()

        if committed:
            errorReport = {}
            deleted = 0
            for data in committed:
                if data.has_key('error'):
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                else:
                    deleted += 1
            return {'delete': deleted, 'error': errorReport}
        else:
            return "nothing"

    def replicate(self, target):
        return self.couchServer.replicate(self.dbName, target, continuous = True,
                                   filter = 'WMStats/repfilter', useReplicator = True)
    
    def getDBInstance(self):
        return self.couchDB
Esempio n. 8
0
destCouchHost = sys.argv[1]
destDbBase = sys.argv[2]

destCouchServer = CouchServer(dburl = destCouchHost)
srcCouchServer = CouchServer(dburl = srcCouchHost)

srcJobsDb = srcCouchHost + "/" + srcDbBase + "%2Fjobs"
destJobsDb = destCouchHost + "/" + destDbBase + "%2Fjobs"
srcFwjrsDb = srcCouchHost + "/" + srcDbBase + "%2Ffwjrs"
destFwjrsDb = destCouchHost + "/" + destDbBase + "%2Ffwjrs"

print "Archiving %s/%s to %s/%s..." % (srcCouchHost, srcDbBase, destCouchHost, destDbBase)

# Replicate the FWJR and Jobs databases...
print "  Replicating jobs database..."
destCouchServer.replicate(srcJobsDb, destJobsDb, create_target = True)
print "  Replication fwjrs database..."
destCouchServer.replicate(srcFwjrsDb, destFwjrsDb, create_target = True)

# Generate views for the various databases
destJobsDb = destCouchServer.connectDatabase(destDbBase + "/jobs")
destFwjrsDb = destCouchServer.connectDatabase(destDbBase + "/fwjrs")
print "  Triggering view generation for jobs database..."
destJobsDb.loadView("JobDump", "statusByWorkflowName", options = {"limit": 1})
print "  Triggering view generation for fwjrs database..."
destFwjrsDb.loadView("FWJRDump", "outputByWorkflowName", options = {"limit": 1})

print ""
# Query destination DB for list of workflows
summaryBase = "%s/%s%%2Ffwjrs/_design/FWJRDump/_show/workflowSummary/%s" # dest host, dest db base, workflow name
successBase = "%s/%s%%2Fjobs/_design/JobDump/_list/successJobs/statusByWorkflowName?startkey=%%5B%%22%s%%22%%5D&endkey=%%5B%%22%s%%22%%2C%%7B%%7D%%5D&reduce=false" # dest host, dest db base, workflow, workflow
Esempio n. 9
0
class CMSCouchTest(unittest.TestCase):
    test_counter = 0

    def setUp(self):
        # Make an instance of the server
        self.server = CouchServer(
            os.getenv("COUCHURL", 'http://*****:*****@localhost:5984'))
        self.testname = self.id().split('.')[-1]
        # Create a database, drop an existing one first
        dbname = 'cmscouch_unittest_%s' % self.testname.lower()

        if dbname in self.server.listDatabases():
            self.server.deleteDatabase(dbname)

        self.server.createDatabase(dbname)
        self.db = self.server.connectDatabase(dbname)

    def tearDown(self):
        if sys.exc_info()[0] == None:
            # This test has passed, clean up after it
            dbname = 'cmscouch_unittest_%s' % self.testname.lower()
            self.server.deleteDatabase(dbname)

    def testCommitOne(self):
        # Can I commit one dict
        doc = {'foo': 123, 'bar': 456}
        id = self.db.commitOne(doc, returndocs=True)[0]['id']
        # What about a Document
        doc = Document(inputDict=doc)
        id = self.db.commitOne(doc, returndocs=True)[0]['id']

    def testCommitOneWithQueue(self):
        """
        CommitOne bypasses the queue, but it should maintain the queue if
        present for a future call to commit.
        """
        # Queue up five docs
        doc = {'foo': 123, 'bar': 456}
        for i in range(1, 6):
            self.db.queue(doc)
        # Commit one Document
        doc = Document(inputDict=doc)
        id = self.db.commitOne(doc, returndocs=True)[0]['id']
        self.assertEqual(1, len(self.db.allDocs()['rows']))
        self.db.commit()
        self.assertEqual(6, len(self.db.allDocs()['rows']))

    def testTimeStamping(self):
        doc = {'foo': 123, 'bar': 456}
        id = self.db.commitOne(doc, timestamp=True, returndocs=True)[0]['id']
        doc = self.db.document(id)
        self.assertTrue('timestamp' in doc.keys())

    def testDeleteDoc(self):
        doc = {'foo': 123, 'bar': 456}
        self.db.commitOne(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(1, len(all_docs['rows']))

        # The db.delete_doc is immediate
        id = all_docs['rows'][0]['id']
        self.db.delete_doc(id)
        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testDeleteQueuedDocs(self):
        doc1 = {'foo': 123, 'bar': 456}
        doc2 = {'foo': 789, 'bar': 101112}
        self.db.queue(doc1)
        self.db.queue(doc2)
        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))
        for res in all_docs['rows']:
            id = res['id']
            doc = self.db.document(id)
            self.db.queueDelete(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))

        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testWriteReadDocNoID(self):
        doc = {}

    def testReplicate(self):
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo': 123},
                                   timestamp=True,
                                   returndocs=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)

        self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id))
        self.server.deleteDatabase(repl_db.name)

    def testSlashInDBName(self):
        """
        Slashes are a valid character in a database name, and are useful as it
        creates a directory strucutre for the couch data files.
        """
        db_name = 'wmcore/unittests'
        try:
            self.server.deleteDatabase(db_name)
        except:
            # Ignore this - the database shouldn't already exist
            pass

        db = self.server.createDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db_name = 'wmcore/unittests'
        db = self.server.connectDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db = Database(db_name, url=os.environ["COUCHURL"])
        info = db.info()
        assert info['db_name'] == db_name

        self.server.deleteDatabase(db_name)

    def testInvalidName(self):
        """
        Capitol letters are not allowed in database names.
        """
        db_name = 'Not A Valid Name'
        self.assertRaises(ValueError, self.server.createDatabase, db_name)
        self.assertRaises(ValueError, self.server.deleteDatabase, db_name)
        self.assertRaises(ValueError, self.server.connectDatabase, db_name)
        self.assertRaises(ValueError, Database, db_name)

    def testDocumentSerialisation(self):
        """
        A document should be writable into the couchdb with a timestamp.
        """
        d = Document()
        d['foo'] = 'bar'
        doc_info = self.db.commit(doc=d, timestamp=True)[0]
        d_from_db = self.db.document(doc_info['id'])
        self.assertEqual(d['foo'], d_from_db['foo'])
        self.assertEqual(d['timestamp'], d_from_db['timestamp'])

    def testAttachments(self):
        """
        Test uploading attachments with and without checksumming
        """
        doc = self.db.commitOne({'foo': 'bar'},
                                timestamp=True,
                                returndocs=True)[0]
        attachment1 = "Hello"
        attachment2 = "How are you today?"
        attachment3 = "I'm very well, thanks for asking"
        attachment4 = "Lovely weather we're having"
        attachment5 = "Goodbye"
        keyhash = hashlib.md5()
        keyhash.update(attachment5)
        attachment5_md5 = keyhash.digest()
        attachment5_md5 = base64.b64encode(attachment5_md5)
        attachment6 = "Good day to you, sir!"
        #TODO: add a binary attachment - e.g. tar.gz
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1)
        doc = self.db.addAttachment(doc['id'],
                                    doc['rev'],
                                    attachment2,
                                    contentType="foo/bar")
        doc = self.db.addAttachment(doc['id'],
                                    doc['rev'],
                                    attachment3,
                                    name="my_greeting")
        doc = self.db.addAttachment(doc['id'],
                                    doc['rev'],
                                    attachment4,
                                    add_checksum=True)
        doc = self.db.addAttachment(doc['id'],
                                    doc['rev'],
                                    attachment5,
                                    checksum=attachment5_md5)

        self.assertRaises(CouchInternalServerError,
                          self.db.addAttachment,
                          doc['id'],
                          doc['rev'],
                          attachment6,
                          checksum='123')

    def testRevisionHandling(self):
        # This test won't work from an existing database, conflicts will be preserved, so
        # ruthlessly remove the databases to get a clean slate.
        try:
            self.server.deleteDatabase(self.db.name)
        except CouchNotFoundError:
            pass  # Must have been deleted already

        try:
            self.server.deleteDatabase(self.db.name + 'repl')
        except CouchNotFoundError:
            pass  # Must have been deleted already

        # I'm going to create a conflict, so need a replica db
        self.db = self.server.connectDatabase(self.db.name)
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo': 123},
                                   timestamp=True,
                                   returndocs=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)

        doc_v2 = self.db.document(doc_id)
        doc_v2['bar'] = 456
        doc_id_rev2 = self.db.commitOne(doc_v2, returndocs=True)[0]
        doc_v2 = self.db.document(doc_id)

        #now update the replica
        conflict_doc = repl_db.document(doc_id)
        conflict_doc['bar'] = 101112
        repl_db.commitOne(conflict_doc)

        #replicate, creating the conflict
        self.server.replicate(self.db.name, repl_db.name)
        conflict_view = {
            'map':
            "function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}"
        }
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        # Should have one conflict in the repl database
        self.assertEqual(data['total_rows'], 1)
        # Should have no conflicts in the source database
        self.assertEqual(
            self.db.post('/%s/_temp_view' % self.db.name,
                         conflict_view)['total_rows'], 0)
        self.assertTrue(
            repl_db.documentExists(data['rows'][0]['id'],
                                   rev=data['rows'][0]['key'][0]))

        repl_db.delete_doc(data['rows'][0]['id'],
                           rev=data['rows'][0]['key'][0])
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        self.assertEqual(data['total_rows'], 0)
        self.server.deleteDatabase(repl_db.name)

        #update it again
        doc_v3 = self.db.document(doc_id)
        doc_v3['baz'] = 789
        doc_id_rev3 = self.db.commitOne(doc_v3, returndocs=True)[0]
        doc_v3 = self.db.document(doc_id)

        #test that I can pull out an old revision
        doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev'])
        self.assertEqual(doc_v1, doc_v1_test)

        #test that I can check a revision exists
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev']))

        self.assertFalse(
            self.db.documentExists(doc_id, rev='1' + doc_v2['_rev']))

        #why you shouldn't rely on rev
        self.db.compact(blocking=True)
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev']))
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev']))
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev']))

    def testCommit(self):
        """
        Test queue and commit modes
        """
        # try to commit 2 random docs
        doc = {'foo': 123, 'bar': 456}
        self.db.queue(doc)
        self.db.queue(doc)
        self.assertEqual(2, len(self.db.commit()))

        # committing 2 docs with the same id will fail
        self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456}))
        self.db.queue(Document(id="1", inputDict={'foo': 1234, 'bar': 456}))
        answer = self.db.commit()
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0]['error'], 'conflict')
        self.assertEqual(answer[1]['error'], 'conflict')

        # all_or_nothing mode ignores conflicts
        self.db.queue(Document(id="2", inputDict=doc))
        self.db.queue(Document(id="2", inputDict={'foo': 1234, 'bar': 456}))
        answer = self.db.commit(all_or_nothing=True)
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0]['id'], '2')
        self.assertEqual(answer[1]['id'], '2')

        # callbacks can do stuff when conflicts arise
        # this particular one just overwrites the document
        def callback(db, data, result):
            for doc in data['docs']:
                if doc['_id'] == result['id']:
                    doc['_rev'] = db.document(doc['_id'])['_rev']
                    retval = db.commitOne(doc)
            return retval[0]

        self.db.queue(Document(id="2", inputDict={'foo': 5, 'bar': 6}))
        answer = self.db.commit(callback=callback)
        self.assertEqual(1, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        updatedDoc = self.db.document('2')
        self.assertEqual(updatedDoc['foo'], 5)
        self.assertEqual(updatedDoc['bar'], 6)

        return

    def testUpdateHandler(self):
        """
        Test that update function support works
        """

        update_ddoc = {
            '_id': '_design/foo',
            'language': 'javascript',
            'updates': {
                "bump-counter":
                'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}',
            }
        }
        self.db.commit(update_ddoc)
        doc = {'foo': 123, 'counter': 0}
        doc_id = self.db.commit(doc)[0]['id']
        self.assertEqual("bumped it!",
                         self.db.updateDocument(doc_id, 'foo', 'bump-counter'))

        self.assertEqual(1, self.db.document(doc_id)['counter'])

    def testList(self):
        """
        Test list function works ok
        """
        update_ddoc = {
            '_id': '_design/foo',
            'language': 'javascript',
            'views': {
                'all': {
                    'map': 'function(doc) {emit(null, null) }'
                },
            },
            'lists': {
                'errorinoutput':
                'function(doc, req) {send("A string with the word error in")}',
                'malformed': 'function(doc, req) {somethingtoraiseanerror}',
            }
        }
        self.db.commit(update_ddoc)
        # approriate errors raised
        self.assertRaises(CouchNotFoundError, self.db.loadList, 'foo', 'error',
                          'view_doesnt_exist')
        self.assertRaises(CouchInternalServerError, self.db.loadList, 'foo',
                          'malformed', 'all')
        # error in list output string shouldn't raise an error
        self.assertEqual(self.db.loadList('foo', 'errorinoutput', 'all'),
                         "A string with the word error in")

    def testAllDocs(self):
        """
        Test AllDocs with options
        """
        self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456}))
        self.db.queue(Document(id="2", inputDict={'foo': 123, 'bar': 456}))
        self.db.queue(Document(id="3", inputDict={'foo': 123, 'bar': 456}))

        self.db.commit()
        self.assertEqual(3, len(self.db.allDocs()['rows']))
        self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows']))
        self.assertEqual(2, len(self.db.allDocs(keys=["1", "3"])['rows']))
        self.assertEqual(
            1, len(self.db.allDocs({'limit': 1}, ["1", "3"])['rows']))
        self.assertTrue('error' in self.db.allDocs(keys=["1", "4"])['rows'][1])
Esempio n. 10
0
class CMSCouchTest(unittest.TestCase):
    test_counter = 0
    def setUp(self):
        # Make an instance of the server
        self.server = CouchServer(os.getenv("COUCHURL", 'http://*****:*****@localhost:5984'))
        self.testname = self.id().split('.')[-1]
        # Create a database, drop an existing one first
        dbname = 'cmscouch_unittest_%s' % self.testname.lower()

        if dbname in self.server.listDatabases():
            self.server.deleteDatabase(dbname)

        self.server.createDatabase(dbname)
        self.db = self.server.connectDatabase(dbname)

    def tearDown(self):
        if sys.exc_info()[0] == None:
            # This test has passed, clean up after it
            dbname = 'cmscouch_unittest_%s' % self.testname.lower()
            self.server.deleteDatabase(dbname)

    def testCommitOne(self):
        # Can I commit one dict
        doc = {'foo':123, 'bar':456}
        id = self.db.commitOne(doc, returndocs=True)[0]['id']
        # What about a Document
        doc = Document(inputDict = doc)
        id = self.db.commitOne(doc, returndocs=True)[0]['id']

    def testCommitOneWithQueue(self):
        """
        CommitOne bypasses the queue, but it should maintain the queue if
        present for a future call to commit.
        """
        # Queue up five docs
        doc = {'foo':123, 'bar':456}
        for i in range(1,6):
            self.db.queue(doc)
        # Commit one Document
        doc = Document(inputDict = doc)
        id = self.db.commitOne(doc, returndocs=True)[0]['id']
        self.assertEqual(1, len(self.db.allDocs()['rows']))
        self.db.commit()
        self.assertEqual(6, len(self.db.allDocs()['rows']))

    def testTimeStamping(self):
        doc = {'foo':123, 'bar':456}
        id = self.db.commitOne(doc, timestamp=True, returndocs=True)[0]['id']
        doc = self.db.document(id)
        self.assertTrue('timestamp' in doc.keys())

    def testDeleteDoc(self):
        doc = {'foo':123, 'bar':456}
        self.db.commitOne(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(1, len(all_docs['rows']))

        # The db.delete_doc is immediate
        id = all_docs['rows'][0]['id']
        self.db.delete_doc(id)
        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testDeleteQueuedDocs(self):
        doc1 = {'foo':123, 'bar':456}
        doc2 = {'foo':789, 'bar':101112}
        self.db.queue(doc1)
        self.db.queue(doc2)
        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))
        for res in all_docs['rows']:
            id = res['id']
            doc = self.db.document(id)
            self.db.queueDelete(doc)
        all_docs = self.db.allDocs()
        self.assertEqual(2, len(all_docs['rows']))

        self.db.commit()

        all_docs = self.db.allDocs()
        self.assertEqual(0, len(all_docs['rows']))

    def testWriteReadDocNoID(self):
        doc = {}

    def testReplicate(self):
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo':123}, timestamp=True, returndocs=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)

        self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id))
        self.server.deleteDatabase(repl_db.name)

    def testSlashInDBName(self):
        """
        Slashes are a valid character in a database name, and are useful as it
        creates a directory strucutre for the couch data files.
        """
        db_name = 'wmcore/unittests'
        try:
            self.server.deleteDatabase(db_name)
        except:
            # Ignore this - the database shouldn't already exist
            pass

        db = self.server.createDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db_name = 'wmcore/unittests'
        db = self.server.connectDatabase(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        db = Database(db_name)
        info = db.info()
        assert info['db_name'] == db_name

        self.server.deleteDatabase(db_name)

    def testInvalidName(self):
        """
        Capitol letters are not allowed in database names.
        """
        db_name = 'Not A Valid Name'
        self.assertRaises(ValueError, self.server.createDatabase, db_name)
        self.assertRaises(ValueError, self.server.deleteDatabase, db_name)
        self.assertRaises(ValueError, self.server.connectDatabase, db_name)
        self.assertRaises(ValueError, Database, db_name)

    def testDocumentSerialisation(self):
        """
        A document should be writable into the couchdb with a timestamp.
        """
        d = Document()
        d['foo'] = 'bar'
        doc_info = self.db.commit(doc=d, timestamp=True)[0]
        d_from_db = self.db.document(doc_info['id'])
        self.assertEquals(d['foo'], d_from_db['foo'])
        self.assertEquals(d['timestamp'], d_from_db['timestamp'])

    def testAttachments(self):
        """
        Test uploading attachments with and without checksumming
        """
        doc = self.db.commitOne({'foo':'bar'}, timestamp=True, returndocs=True)[0]
        attachment1 = "Hello"
        attachment2 = "How are you today?"
        attachment3 = "I'm very well, thanks for asking"
        attachment4 = "Lovely weather we're having"
        attachment5 = "Goodbye"
        keyhash = hashlib.md5()
        keyhash.update(attachment5)
        attachment5_md5 = keyhash.digest()
        attachment5_md5 = base64.b64encode(attachment5_md5)
        attachment6 = "Good day to you, sir!"
        #TODO: add a binary attachment - e.g. tar.gz
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting")
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True)
        doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5)

        self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123')

    def testRevisionHandling(self):
        # This test won't work from an existing database, conflicts will be preserved, so
        # ruthlessly remove the databases to get a clean slate.
        try:
            self.server.deleteDatabase(self.db.name)
        except CouchNotFoundError:
            pass # Must have been deleted already

        try:
            self.server.deleteDatabase(self.db.name + 'repl')
        except CouchNotFoundError:
            pass # Must have been deleted already

        # I'm going to create a conflict, so need a replica db
        self.db = self.server.connectDatabase(self.db.name)
        repl_db = self.server.connectDatabase(self.db.name + 'repl')

        doc_id = self.db.commitOne({'foo':123}, timestamp=True, returndocs=True)[0]['id']
        doc_v1 = self.db.document(doc_id)

        #replicate
        self.server.replicate(self.db.name, repl_db.name)

        doc_v2 = self.db.document(doc_id)
        doc_v2['bar'] = 456
        doc_id_rev2 = self.db.commitOne(doc_v2, returndocs=True)[0]
        doc_v2 = self.db.document(doc_id)

        #now update the replica
        conflict_doc = repl_db.document(doc_id)
        conflict_doc['bar'] = 101112
        repl_db.commitOne(conflict_doc)

        #replicate, creating the conflict
        self.server.replicate(self.db.name, repl_db.name)
        conflict_view = {'map':"function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}"}
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        # Should have one conflict in the repl database
        self.assertEquals(data['total_rows'], 1)
        # Should have no conflicts in the source database
        self.assertEquals(self.db.post('/%s/_temp_view' % self.db.name, conflict_view)['total_rows'], 0)
        self.assertTrue(repl_db.documentExists(data['rows'][0]['id'], rev=data['rows'][0]['key'][0]))

        repl_db.delete_doc(data['rows'][0]['id'], rev=data['rows'][0]['key'][0])
        data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view)

        self.assertEquals(data['total_rows'], 0)
        self.server.deleteDatabase(repl_db.name)

        #update it again
        doc_v3 = self.db.document(doc_id)
        doc_v3['baz'] = 789
        doc_id_rev3 = self.db.commitOne(doc_v3, returndocs=True)[0]
        doc_v3 = self.db.document(doc_id)

        #test that I can pull out an old revision
        doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev'])
        self.assertEquals(doc_v1, doc_v1_test)

        #test that I can check a revision exists
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev']))

        self.assertFalse(self.db.documentExists(doc_id, rev='1'+doc_v2['_rev']))

        #why you shouldn't rely on rev
        self.db.compact(blocking=True)
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev']))
        self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev']))
        self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev']))

    def testCommit(self):
        """
        Test queue and commit modes
        """
        # try to commit 2 random docs
        doc = {'foo':123, 'bar':456}
        self.db.queue(doc)
        self.db.queue(doc)
        self.assertEqual(2, len(self.db.commit()))

        # committing 2 docs with the same id will fail
        self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "1", inputDict = {'foo':1234, 'bar':456}))
        answer = self.db.commit()
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0]['error'], 'conflict')
        self.assertEqual(answer[1]['error'], 'conflict')

        # all_or_nothing mode ignores conflicts
        self.db.queue(Document(id = "2", inputDict = doc))
        self.db.queue(Document(id = "2", inputDict = {'foo':1234, 'bar':456}))
        answer = self.db.commit(all_or_nothing = True)
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0]['id'], '2')
        self.assertEqual(answer[1]['id'], '2')

    def testUpdateHandler(self):
        """
        Test that update function support works
        """

        update_ddoc = {
            '_id':'_design/foo',
            'language': 'javascript',
            'updates':{
                "bump-counter" : 'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}',
            }
        }
        self.db.commit(update_ddoc)
        doc = {'foo': 123, 'counter': 0}
        doc_id = self.db.commit(doc)[0]['id']
        self.assertEquals("bumped it!", self.db.updateDocument(doc_id, 'foo', 'bump-counter'))

        self.assertEquals(1, self.db.document(doc_id)['counter'])
Esempio n. 11
0
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self,
                 db_url,
                 db_name='workqueue',
                 inbox_name=None,
                 parentQueue=None,
                 queueUrl=None,
                 logger=None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging

        if inbox_name is None:
            inbox_name = "%s_inbox" % db_name

        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name,
                                              create=False,
                                              size=10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name,
                                                 create=False,
                                                 size=10000)
        self.queueUrl = sanitizeURL(queueUrl
                                    or (db_url + '/' + db_name))['url']
        self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'

    def forceQueueSync(self):
        """Force a blocking replication - used only in tests"""
        self.pullFromParent(continuous=False)
        self.sendToParent(continuous=False)

    def pullFromParent(self, continuous=True, cancel=False):
        """Replicate from parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.logger.info(
                    "Forcing pullFromParent from parentCouch: %s to queueUrl %s/%s",
                    self.parentCouchUrl, self.queueUrl, self.inbox.name)
                self.server.replicate(source=self.parentCouchUrl,
                                      destination="%s/%s" %
                                      (self.hostWithAuth, self.inbox.name),
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication from %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def sendToParent(self, continuous=True, cancel=False):
        """Replicate to parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.logger.info(
                    "Forcing sendToParent from queueUrl %s/%s to parentCouch: %s",
                    self.queueUrl, self.inbox.name, self.parentCouchUrl)
                self.server.replicate(source="%s" % self.inbox.name,
                                      destination=self.parentCouchUrlWithAuth,
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication to %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def getElementsForSplitting(self):
        """Returns the elements from the inbox that need to be split,
        if WorkflowName specified only return elements to split for that workflow"""
        elements = self.getInboxElements(status='Negotiating')
        specs = {}  # cache as may have multiple elements for same spec
        for ele in elements:
            if ele['RequestName'] not in specs:
                wmspec = WMWorkloadHelper()
                wmspec.load(self.parentCouchUrlWithAuth +
                            "/%s/spec" % ele['RequestName'])
                specs[ele['RequestName']] = wmspec
            ele['WMSpec'] = specs[ele['RequestName']]
        del specs
        return elements

    def insertWMSpec(self, wmspec):
        """
        Insert WMSpec to backend
        """
        # Can't save spec to inbox, it needs to be visible to child queues
        # Can't save empty dict so add dummy variable
        dummyValues = {'name': wmspec.name()}
        # change specUrl in spec before saving (otherwise it points to previous url)
        wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" %
                          (self.db.name, wmspec.name()))
        return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummyValues)

    def getWMSpec(self, name):
        """Get the spec"""
        wmspec = WMWorkloadHelper()
        wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name))
        return wmspec

    def insertElements(self, units, parent=None):
        """
        Insert element to database

        @param parent is the parent WorkQueueObject these element's belong to.
                                            i.e. a workflow which has been split
        """
        if not units:
            return []
        # store spec file separately - assume all elements share same spec
        self.insertWMSpec(units[0]['WMSpec'])
        newUnitsInserted = []
        for unit in units:

            # cast to couch
            if not isinstance(unit, CouchWorkQueueElement):
                unit = CouchWorkQueueElement(self.db, elementParams=dict(unit))

            if parent:
                unit['ParentQueueId'] = parent.id
                unit['TeamName'] = parent['TeamName']
                unit['WMBSUrl'] = parent['WMBSUrl']

            if unit._couch.documentExists(unit.id):
                self.logger.info(
                    'Element "%s" already exists, skip insertion.' % unit.id)
                continue
            else:
                newUnitsInserted.append(unit)
            unit.save()
            unit._couch.commit(all_or_nothing=True)

        return newUnitsInserted

    def createWork(self, spec, **kwargs):
        """Return the Inbox element for this spec.

        This does not persist it to the database.
        """
        kwargs.update({
            'WMSpec': spec,
            'RequestName': spec.name(),
            'StartPolicy': spec.startPolicyParameters(),
            'EndPolicy': spec.endPolicyParameters(),
            'OpenForNewData': False
        })
        unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs)
        unit.id = spec.name()
        return unit

    def getElements(self,
                    status=None,
                    elementIDs=None,
                    returnIdOnly=False,
                    db=None,
                    loadSpec=False,
                    WorkflowName=None,
                    **elementFilters):
        """Return elements that match requirements

        status, elementIDs & filters are 'AND'ed together to filter elements.
        returnIdOnly causes the element not to be loaded and only the id returned
        db is used to specify which database to return from
        loadSpec causes the workflow for each spec to be loaded.
        WorkflowName may be used in the place of RequestName
        """
        key = []
        if not db:
            db = self.db
        if elementFilters.get('RequestName') and not WorkflowName:
            WorkflowName = elementFilters.pop('RequestName')

        if elementIDs:
            if elementFilters or status or returnIdOnly:
                msg = "Can't specify extra filters (or return id's) when using element id's with getElements()"
                raise ValueError(msg)
            elements = [
                CouchWorkQueueElement(db, i).load() for i in elementIDs
            ]
        else:
            options = {
                'include_docs': True,
                'filter': elementFilters,
                'idOnly': returnIdOnly,
                'reduce': False
            }
            # filter on workflow or status if possible
            filterName = 'elementsByWorkflow'
            if WorkflowName:
                key.append(WorkflowName)
            elif status:
                filterName = 'elementsByStatus'
                key.append(status)
            elif elementFilters.get('SubscriptionId'):
                key.append(elementFilters['SubscriptionId'])
                filterName = 'elementsBySubscription'
            # add given params to filters
            if status:
                options['filter']['Status'] = status
            if WorkflowName:
                options['filter']['RequestName'] = WorkflowName

            view = db.loadList('WorkQueue', 'filter', filterName, options, key)
            view = json.loads(view)
            if returnIdOnly:
                return view
            elements = [
                CouchWorkQueueElement.fromDocument(db, row) for row in view
            ]

        if loadSpec:
            specs = {}  # cache as may have multiple elements for same spec
            for ele in elements:
                if ele['RequestName'] not in specs:
                    wmspec = self.getWMSpec(ele['RequestName'])
                    specs[ele['RequestName']] = wmspec
                ele['WMSpec'] = specs[ele['RequestName']]
            del specs
        return elements

    def getInboxElements(self, *args, **kwargs):
        """
        Return elements from Inbox, supports same semantics as getElements()
        """
        return self.getElements(*args, db=self.inbox, **kwargs)

    def getElementsForWorkflow(self, workflow):
        """Get elements for a workflow"""
        elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', {
            'key': workflow,
            'include_docs': True,
            'reduce': False
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParent(self, parent):
        """Get elements with the given parent"""
        elements = self.db.loadView('WorkQueue', 'elementsByParent', {
            'key': parent.id,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def saveElements(self, *elements):
        """Persist elements

        Returns elements successfully saved, user must verify to catch errors
        """
        result = []
        if not elements:
            return result
        for element in elements:
            element.save()
        answer = elements[0]._couch.commit()
        result, failures = formatReply(answer, *elements)
        msg = 'Couch error saving element: "%s", error "%s", reason "%s"'
        for failed in failures:
            self.logger.error(
                msg % (failed['id'], failed['error'], failed['reason']))
        return result

    def _raiseConflictErrorAndLog(self,
                                  conflictIDs,
                                  updatedParams,
                                  dbName="workqueue"):
        errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % (
            dbName, conflictIDs, updatedParams)
        self.logger.error(errorMsg)
        raise WorkQueueError(errorMsg)

    def updateElements(self, *elementIds, **updatedParams):
        """Update given element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.db.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams)
        return

    def updateInboxElements(self, *elementIds, **updatedParams):
        """Update given inbox element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams,
                                           "workqueue_inbox")
        return

    def deleteElements(self, *elements):
        """Delete elements"""
        if not elements:
            return
        specs = {}
        for i in elements:
            i.delete()
            specs[i['RequestName']] = None
        answer = elements[0]._couch.commit()
        _, failures = formatReply(answer, *elements)
        msg = 'Couch error deleting element: "%s", error "%s", reason "%s"'
        for failed in failures:
            # only count delete as failed if document still exists
            if elements[0]._couch.documentExists(failed['id']):
                self.logger.error(
                    msg % (failed['id'], failed['error'], failed['reason']))
        # delete specs if no longer used
        for wf in specs:
            try:
                if not self.db.loadView('WorkQueue', 'elementsByWorkflow', {
                        'key': wf,
                        'limit': 1,
                        'reduce': False
                })['rows']:
                    self.db.delete_doc(wf)
            except CouchNotFoundError:
                pass

    def calculateAvailableWork(self, thresholds, siteJobCounts):
        """
        A short version of the `availableWork` method, which is used only to calculate
        the amount of work already available at the local workqueue.
        :param thresholds: a dictionary key'ed by the site name, values representing the
            maximum number of jobs allowed at that site.
        :param siteJobCounts: a dictionary-of-dictionaries key'ed by the site name; value
            is a dictionary with the number of jobs running at a given priority.
        :return: a tuple with the elements accepted and an overview of job counts per site
        """
        # NOTE: this method can be less verbose as well
        elements = []
        # If there are no sites, punt early.
        if not thresholds:
            self.logger.error("No thresholds is set: Please check")
            return elements, siteJobCounts

        self.logger.info("Calculating available work from queue %s",
                         self.queueUrl)

        options = {}
        options['include_docs'] = True
        options['descending'] = True
        options['resources'] = thresholds
        options['num_elem'] = 9999999  # magic number!
        result = self.db.loadList('WorkQueue', 'workRestrictions',
                                  'availableByPriority', options)
        result = json.loads(result)
        self.logger.info(
            "Retrieved %d elements from workRestrictions list for: %s",
            len(result), self.queueUrl)

        # Convert python dictionary into Couch WQE objects
        # And sort them by creation time and priority, such that highest priority and
        # oldest elements come first in the list
        sortedElements = []
        for item in result:
            element = CouchWorkQueueElement.fromDocument(self.db, item)
            sortedElements.append(element)
        sortAvailableElements(sortedElements)

        for element in sortedElements:
            commonSites = possibleSites(element)
            prio = element['Priority']
            # shuffle list of common sites all the time to give everyone the same chance
            random.shuffle(commonSites)
            possibleSite = None
            for site in commonSites:
                if site in thresholds:
                    # Count the number of jobs currently running of greater priority, if they
                    # are less than the site thresholds, then accept this element
                    curJobCount = sum([
                        x[1] if x[0] >= prio else 0
                        for x in viewitems(siteJobCounts.get(site, {}))
                    ])
                    self.logger.debug("Job Count: %s, site: %s thresholds: %s",
                                      curJobCount, site, thresholds[site])
                    if curJobCount < thresholds[site]:
                        possibleSite = site
                        break

            if possibleSite:
                self.logger.debug(
                    "Meant to accept workflow: %s, with prio: %s, element id: %s, for site: %s",
                    element['RequestName'], prio, element.id, possibleSite)
                elements.append(element)
                siteJobCounts.setdefault(possibleSite, {})
                siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \
                                                    element['Jobs'] * element.get('blowupFactor', 1.0)
            else:
                self.logger.debug(
                    "No available resources for %s with localdoc id %s",
                    element['RequestName'], element.id)

        self.logger.info(
            "And %d elements passed location and siteJobCounts restrictions for: %s",
            len(elements), self.queueUrl)
        return elements, siteJobCounts

    def availableWork(self,
                      thresholds,
                      siteJobCounts,
                      team=None,
                      excludeWorkflows=None,
                      numElems=9999999):
        """
        Get work - either from local or global queue - which is available to be run.

        :param thresholds: a dictionary key'ed by the site name, values representing the
            maximum number of jobs allowed at that site.
        :param siteJobCounts: a dictionary-of-dictionaries key'ed by the site name; value
            is a dictionary with the number of jobs running at a given priority.
        :param team: a string with the team name we want to pull work for
        :param excludeWorkflows: list of (aborted) workflows that should not be accepted
        :param numElems: integer with the maximum number of elements to be accepted (default
            to a very large number when pulling work from local queue, read unlimited)
        :return: a tuple with the elements accepted and an overview of job counts per site
        """
        excludeWorkflows = excludeWorkflows or []
        elements = []
        # If there are no sites, punt early.
        if not thresholds:
            self.logger.error("No thresholds is set: Please check")
            return elements, siteJobCounts

        self.logger.info("Current siteJobCounts:")
        for site, jobsByPrio in viewitems(siteJobCounts):
            self.logger.info("    %s : %s", site, jobsByPrio)

        self.logger.info("Getting up to %d available work from %s", numElems,
                         self.queueUrl)
        self.logger.info("  for team name: %s", team)
        self.logger.info("  with excludeWorkflows: %s", excludeWorkflows)
        self.logger.info("  for thresholds: %s", thresholds)

        # FIXME: magic numbers
        docsSliceSize = 1000
        options = {}
        options['include_docs'] = True
        options['descending'] = True
        options['resources'] = thresholds
        options['limit'] = docsSliceSize
        # FIXME: num_elem option can likely be deprecated, but it needs synchronization
        # between agents and global workqueue... for now, make sure it can return the slice size
        options['num_elem'] = docsSliceSize
        if team:
            options['team'] = team

        # Fetch workqueue elements in slices, using the CouchDB "limit" and "skip"
        # options for couch views. Conditions to stop this loop are:
        #  a) have a hard stop at 50k+1 (we might have to make this configurable)
        #  b) stop as soon as an empty slice is returned by Couch (thus all docs have
        #     already been retrieve)
        #  c) or, once "numElems" elements have been accepted
        numSkip = 0
        breakOut = False
        while True:
            if breakOut:
                # then we have reached the maximum number of elements to be accepted
                break
            self.logger.info("  with limit docs: %s, and skip first %s docs",
                             docsSliceSize, numSkip)
            options['skip'] = numSkip

            result = self.db.loadList('WorkQueue', 'workRestrictions',
                                      'availableByPriority', options)
            result = json.loads(result)
            if result:
                self.logger.info(
                    "Retrieved %d elements from workRestrictions list for: %s",
                    len(result), self.queueUrl)
            else:
                self.logger.info(
                    "All the workqueue elements have been exhausted for: %s ",
                    self.queueUrl)
                break
            # update number of documents to skip in the next cycle
            numSkip += docsSliceSize

            # Convert python dictionary into Couch WQE objects, skipping aborted workflows
            # And sort them by creation time and priority, such that highest priority and
            # oldest elements come first in the list
            sortedElements = []
            for i in result:
                element = CouchWorkQueueElement.fromDocument(self.db, i)
                # make sure not to acquire work for aborted or force-completed workflows
                if element['RequestName'] in excludeWorkflows:
                    msg = "Skipping aborted/force-completed workflow: %s, work id: %s"
                    self.logger.info(msg, element['RequestName'], element._id)
                else:
                    sortedElements.append(element)
            sortAvailableElements(sortedElements)

            for element in sortedElements:
                if numElems <= 0:
                    msg = "Reached maximum number of elements to be accepted, "
                    msg += "configured to: {}, from queue: {}".format(
                        len(elements), self.queueUrl)
                    self.logger.info(msg)
                    breakOut = True  # get out of the outer loop as well
                    break
                commonSites = possibleSites(element)
                prio = element['Priority']
                # shuffle list of common sites all the time to give everyone the same chance
                random.shuffle(commonSites)
                possibleSite = None
                for site in commonSites:
                    if site in thresholds:
                        # Count the number of jobs currently running of greater priority, if they
                        # are less than the site thresholds, then accept this element
                        curJobCount = sum([
                            x[1] if x[0] >= prio else 0
                            for x in viewitems(siteJobCounts.get(site, {}))
                        ])
                        self.logger.debug(
                            "Job Count: %s, site: %s thresholds: %s" %
                            (curJobCount, site, thresholds[site]))
                        if curJobCount < thresholds[site]:
                            possibleSite = site
                            break

                if possibleSite:
                    self.logger.info(
                        "Accepting workflow: %s, with prio: %s, element id: %s, for site: %s",
                        element['RequestName'], prio, element.id, possibleSite)
                    numElems -= 1
                    elements.append(element)
                    siteJobCounts.setdefault(possibleSite, {})
                    siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \
                                                        element['Jobs'] * element.get('blowupFactor', 1.0)
                else:
                    self.logger.debug(
                        "No available resources for %s with doc id %s",
                        element['RequestName'], element.id)

        self.logger.info(
            "And %d elements passed location and siteJobCounts restrictions for: %s",
            len(elements), self.queueUrl)
        return elements, siteJobCounts

    def getActiveData(self):
        """Get data items we have work in the queue for"""
        data = self.db.loadView('WorkQueue', 'activeData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActiveParentData(self):
        """Get data items we have work in the queue for with parent"""
        data = self.db.loadView('WorkQueue', 'activeParentData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActivePileupData(self):
        """Get data items we have work in the queue for with pileup"""
        data = self.db.loadView('WorkQueue', 'activePileupData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getElementsForData(self, data):
        """Get active elements for this dbs & data combo"""
        elements = self.db.loadView('WorkQueue', 'elementsByData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParentData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByParentData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForPileupData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByPileupData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def isAvailable(self):
        """Is the server available, i.e. up and not compacting"""
        try:
            compacting = self.db.info()['compact_running']
            if compacting:
                self.logger.info("CouchDB compacting - try again later.")
                return False
        except Exception as ex:
            self.logger.error("CouchDB unavailable: %s" % str(ex))
            return False
        return True

    def getWorkflows(self, includeInbox=False, includeSpecs=False):
        """Returns workflows known to workqueue"""
        result = set([
            x['key'] for x in self.db.loadView(
                'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
        ])
        if includeInbox:
            result = result | set([
                x['key'] for x in self.inbox.loadView(
                    'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
            ])
        if includeSpecs:
            result = result | set([
                x['key'] for x in self.db.loadView('WorkQueue',
                                                   'specsByWorkflow')['rows']
            ])
        return list(result)

    def queueLength(self):
        """Return number of available elements"""
        return self.db.loadView('WorkQueue', 'availableByPriority',
                                {'limit': 0})['total_rows']

    def fixConflicts(self):
        """Fix elements in conflict

        Each local queue runs this to resolve its conflicts with global,
        resolution propagates up to global.

        Conflicting elements are merged into one element with others deleted.

        This will fail if elements are modified during the resolution -
        if this happens rerun.
        """
        for db in [self.inbox, self.db]:
            for row in db.loadView('WorkQueue', 'conflicts')['rows']:
                elementId = row['id']
                try:
                    conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(elementId, rev)) \
                                            for rev in row['value']]
                    fixed_elements = fixElementConflicts(*conflicting_elements)
                    if self.saveElements(fixed_elements[0]):
                        self.saveElements(
                            *fixed_elements[1:]
                        )  # delete others (if merged value update accepted)
                except Exception as ex:
                    self.logger.error("Error resolving conflict for %s: %s" %
                                      (elementId, str(ex)))

    def recordTaskActivity(self, taskname, comment=''):
        """Record a task for monitoring"""
        try:
            record = self.db.document('task_activity')
        except CouchNotFoundError:
            record = Document('task_activity')
        record.setdefault('tasks', {})
        record['tasks'].setdefault(taskname, {})
        record['tasks'][taskname]['timestamp'] = time.time()
        record['tasks'][taskname]['comment'] = comment
        try:
            self.db.commitOne(record)
        except Exception as ex:
            self.logger.error("Unable to update task %s freshness: %s" %
                              (taskname, str(ex)))

    def getWMBSInjectStatus(self, request=None):
        """
        This service only provided by global queue except on draining agent
        """
        options = {'group': True, 'reduce': True}
        if request:
            options.update(key=request)
        data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest',
                                options)
        if request:
            if data['rows']:
                injectionStatus = data['rows'][0]['value']
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                return injectionStatus and not requestOpen
            else:
                raise WorkQueueNoMatchingElements("%s not found" % request)
        else:
            injectionStatus = dict(
                (x['key'], x['value']) for x in data.get('rows', []))
            finalInjectionStatus = []
            for request in injectionStatus:
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                finalInjectionStatus.append(
                    {request: injectionStatus[request] and not requestOpen})

            return finalInjectionStatus

    def getWorkflowNames(self, inboxFlag=False):
        """Get workflow names from workqueue db"""
        if inboxFlag:
            db = self.inbox
        else:
            db = self.db
        data = db.loadView('WorkQueue', 'elementsByWorkflow', {
            'stale': "update_after",
            'reduce': True,
            'group': True
        })
        return [x['key'] for x in data.get('rows', [])]

    def deleteWQElementsByWorkflow(self, workflowNames):
        """
        delete workqueue elements belongs to given workflow names
        it doen't check the status of workflow so need to be careful to use this.
        Pass only workflows which has the end status
        """
        deleted = 0
        dbs = [self.db, self.inbox]
        if not isinstance(workflowNames, list):
            workflowNames = [workflowNames]

        if len(workflowNames) == 0:
            return deleted
        options = {}
        options["stale"] = "update_after"
        options["reduce"] = False

        idsByWflow = {}
        for couchdb in dbs:
            result = couchdb.loadView("WorkQueue", "elementsByWorkflow",
                                      options, workflowNames)
            for entry in result["rows"]:
                idsByWflow.setdefault(entry['key'], [])
                idsByWflow[entry['key']].append(entry['id'])
            for wflow, docIds in viewitems(idsByWflow):
                self.logger.info(
                    "Going to delete %d documents in *%s* db for workflow: %s. Doc IDs: %s",
                    len(docIds), couchdb.name, wflow, docIds)
                try:
                    couchdb.bulkDeleteByIDs(docIds)
                except CouchNotFoundError as exc:
                    self.logger.error(
                        "Failed to find one of the documents. Error: %s",
                        str(exc))
                deleted += len(docIds)
        # delete the workflow with spec from workqueue db
        for wf in workflowNames:
            self.db.delete_doc(wf)
        return deleted
Esempio n. 12
0
destDbBase = sys.argv[2]

destCouchServer = CouchServer(dburl=destCouchHost)
srcCouchServer = CouchServer(dburl=srcCouchHost)

srcJobsDb = srcCouchHost + "/" + srcDbBase + "%2Fjobs"
destJobsDb = destCouchHost + "/" + destDbBase + "%2Fjobs"
srcFwjrsDb = srcCouchHost + "/" + srcDbBase + "%2Ffwjrs"
destFwjrsDb = destCouchHost + "/" + destDbBase + "%2Ffwjrs"

print("Archiving %s/%s to %s/%s..." %
      (srcCouchHost, srcDbBase, destCouchHost, destDbBase))

# Replicate the FWJR and Jobs databases...
print("  Replicating jobs database...")
destCouchServer.replicate(srcJobsDb, destJobsDb, create_target=True)
print("  Replication fwjrs database...")
destCouchServer.replicate(srcFwjrsDb, destFwjrsDb, create_target=True)

# Generate views for the various databases
destJobsDb = destCouchServer.connectDatabase(destDbBase + "/jobs")
destFwjrsDb = destCouchServer.connectDatabase(destDbBase + "/fwjrs")
print("  Triggering view generation for jobs database...")
destJobsDb.loadView("JobDump", "statusByWorkflowName", options={"limit": 1})
print("  Triggering view generation for fwjrs database...")
destFwjrsDb.loadView("FWJRDump", "outputByWorkflowName", options={"limit": 1})

print("")
# Query destination DB for list of workflows
summaryBase = "%s/%s%%2Ffwjrs/_design/FWJRDump/_show/workflowSummary/%s"  # dest host, dest db base, workflow name
successBase = "%s/%s%%2Fjobs/_design/JobDump/_list/successJobs/statusByWorkflowName?startkey=%%5B%%22%s%%22%%5D&endkey=%%5B%%22%s%%22%%2C%%7B%%7D%%5D&reduce=false"  # dest host, dest db base, workflow, workflow
Esempio n. 13
0
class WMStatsWriter(WMStatsReader):
    def __init__(self, couchURL, dbName=None):
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.replicatorDB = self.couchServer.connectDatabase(
            '_replicator', False)

    def uploadData(self, docs):
        """
        upload to given couchURL using cert and key authentication and authorization
        """
        # add delete docs as well for the compaction
        # need to check whether delete and update is successful
        if type(docs) == dict:
            docs = [docs]
        for doc in docs:
            self.couchDB.queue(doc)
        return self.couchDB.commit(returndocs=True)

    def insertRequest(self, schema):
        doc = monitorDocFromRequestSchema(schema)
        return self.insertGenericRequest(doc)

    def insertGenericRequest(self, doc):
        result = self.couchDB.updateDocument(
            doc['_id'],
            'WMStats',
            'insertRequest',
            fields={'doc': JSONEncoder().encode(doc)})
        self.updateRequestStatus(doc['_id'], "new")
        return result

    def updateRequestStatus(self, request, status):
        statusTime = {'status': status, 'update_time': int(time.time())}
        return self.couchDB.updateDocument(
            request,
            'WMStats',
            'requestStatus',
            fields={'request_status': JSONEncoder().encode(statusTime)})

    def updateTeam(self, request, team):
        return self.couchDB.updateDocument(request,
                                           'WMStats',
                                           'team',
                                           fields={'team': team})

    def insertTotalStats(self, request, totalStats):
        """
        update the total stats of given workflow (total_jobs, input_events, input_lumis, input_num_files)
        """
        return self.couchDB.updateDocument(request,
                                           'WMStats',
                                           'totalStats',
                                           fields=totalStats)

    def updateFromWMSpec(self, spec):
        # currently only update priority and siteWhitelist and output dataset
        # complex field needs to be JSON encoded
        # assuming all the toplevel tasks has the same site white lists
        #priority is priority + user priority + group priority
        fields = {
            'priority': spec.priority(),
            'site_white_list': spec.getTopLevelTask()[0].siteWhitelist(),
            'outputdatasets': spec.listOutputDatasets()
        }
        return self.couchDB.updateDocument(
            spec.name(),
            'WMStats',
            'generalFields',
            fields={'general_fields': JSONEncoder().encode(fields)})

    def updateRequestsInfo(self, docs):
        """
        bulk update for request documents.
        TODO: change to bulk update handler when it gets supported
        """
        for doc in docs:
            del doc['type']
            self.couchDB.updateDocument(
                doc['workflow'],
                'WMStats',
                'generalFields',
                fields={'general_fields': JSONEncoder().encode(doc)})

    def updateAgentInfo(self, agentInfo):
        return self.couchDB.updateDocument(
            agentInfo['_id'],
            'WMStats',
            'agentInfo',
            fields={'agent_info': JSONEncoder().encode(agentInfo)})

    def deleteOldDocs(self, days):
        """
        delete the documents from wmstats db older than param 'days'
        """
        sec = int(days * 24 * 60 * 60)
        threshold = int(time.time()) - sec
        options = {
            "startkey": threshold,
            "descending": True,
            "stale": "update_after"
        }
        result = self.couchDB.loadView("WMStats", "time", options)

        for row in result['rows']:
            doc = {}
            doc['_id'] = row['value']['id']
            doc['_rev'] = row['value']['rev']
            self.couchDB.queueDelete(doc)
        committed = self.couchDB.commit()

        if committed:
            errorReport = {}
            deleted = 0
            for data in committed:
                if data.has_key('error'):
                    errorReport.setdefault(data['error'], 0)
                    errorReport[data['error']] += 1
                else:
                    deleted += 1
            return {'delete': deleted, 'error': errorReport}
        else:
            return "nothing"

    def replicate(self, target):
        return self.couchServer.replicate(self.dbName,
                                          target,
                                          continuous=True,
                                          filter='WMStats/repfilter',
                                          useReplicator=True)

    def getDBInstance(self):
        return self.couchDB

    def getServerInstance(self):
        return self.couchServer

    def getActiveTasks(self):
        couchStatus = self.couchServer.status()
        return couchStatus['active_tasks']

    def deleteReplicatorDocs(self):
        repDocs = self.replicatorDB.allDocs()['rows']
        for j in repDocs:
            if not j['id'].startswith('_'):
                doc = {}
                doc["_id"] = j['id']
                doc["_rev"] = j['value']['rev']
                self.replicatorDB.queueDelete(doc)
        committed = self.replicatorDB.commit()