class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name='workqueue', inbox_name=None, parentQueue=None, queueUrl=None, logger=None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name == None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create=False, size=10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] def forceQueueSync(self): """Force a blocking replication - for use mainly in tests""" self.pullFromParent(continuous=False) self.sendToParent(continuous=False) def pullFromParent(self, continuous=True, cancel=False): """Replicate from parent couch - blocking""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source=self.parentCouchUrl, destination="%s/%s" % (self.hostWithAuth, self.inbox.name), filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel, useReplicator=True) except Exception, ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex)))
class WMStatsReader(): def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False) def workflowsByStatus(self, statusList): keys = statusList options = {"stale": "update_after"} result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys) workflowList = [] for item in result["rows"]: workflowList.append(item["id"]) return workflowList def replicate(self, target): self.couchServer.replicate(self.dbName, target, continuous = True)
class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name = 'workqueue', inbox_name = None, parentQueue = None, queueUrl = None, logger = None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name == None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create = False, size = 10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create = False, size = 10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] def forceQueueSync(self): """Force a blocking replication - for use mainly in tests""" self.pullFromParent(continuous = False) self.sendToParent(continuous = False) def pullFromParent(self, continuous = True, cancel = False): """Replicate from parent couch - blocking""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source = self.parentCouchUrl, destination = "%s/%s" % (self.hostWithAuth, self.inbox.name), filter = 'WorkQueue/queueFilter', query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl}, continuous = continuous, cancel = cancel, useReplicator = True) except Exception, ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex)))
class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name='workqueue', inbox_name=None, parentQueue=None, queueUrl=None, logger=None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name is None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create=False, size=10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement' def forceQueueSync(self): """Force a blocking replication - used only in tests""" self.pullFromParent(continuous=False) self.sendToParent(continuous=False) def pullFromParent(self, continuous=True, cancel=False): """Replicate from parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source=self.parentCouchUrl, destination="%s/%s" % (self.hostWithAuth, self.inbox.name), filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex))) def sendToParent(self, continuous=True, cancel=False): """Replicate to parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source="%s" % self.inbox.name, destination=self.parentCouchUrlWithAuth, filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication to %s failed: %s' % (self.parentCouchUrl, str(ex))) def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status='Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def insertWMSpec(self, wmspec): """ Insert WMSpec to backend """ # Can't save spec to inbox, it needs to be visible to child queues # Can't save empty dict so add dummy variable dummy_values = {'name': wmspec.name()} # change specUrl in spec before saving (otherwise it points to previous url) wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" % (self.db.name, wmspec.name())) return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values) def getWMSpec(self, name): """Get the spec""" wmspec = WMWorkloadHelper() wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name)) return wmspec def insertElements(self, units, parent=None): """ Insert element to database @param parent is the parent WorkQueueObject these element's belong to. i.e. a workflow which has been split """ if not units: return # store spec file separately - assume all elements share same spec self.insertWMSpec(units[0]['WMSpec']) newUnitsInserted = [] for unit in units: # cast to couch if not isinstance(unit, CouchWorkQueueElement): unit = CouchWorkQueueElement(self.db, elementParams=dict(unit)) if parent: unit['ParentQueueId'] = parent.id unit['TeamName'] = parent['TeamName'] unit['WMBSUrl'] = parent['WMBSUrl'] if unit._couch.documentExists(unit.id): self.logger.info( 'Element "%s" already exists, skip insertion.' % unit.id) continue else: newUnitsInserted.append(unit) unit.save() unit._couch.commit(all_or_nothing=True) return newUnitsInserted def createWork(self, spec, **kwargs): """Return the Inbox element for this spec. This does not persist it to the database. """ kwargs.update({ 'WMSpec': spec, 'RequestName': spec.name(), 'StartPolicy': spec.startPolicyParameters(), 'EndPolicy': spec.endPolicyParameters(), 'OpenForNewData': False }) unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs) unit.id = spec.name() return unit def getElements(self, status=None, elementIDs=None, returnIdOnly=False, db=None, loadSpec=False, WorkflowName=None, **elementFilters): """Return elements that match requirements status, elementIDs & filters are 'AND'ed together to filter elements. returnIdOnly causes the element not to be loaded and only the id returned db is used to specify which database to return from loadSpec causes the workflow for each spec to be loaded. WorkflowName may be used in the place of RequestName """ key = [] if not db: db = self.db if elementFilters.get('RequestName') and not WorkflowName: WorkflowName = elementFilters.pop('RequestName') if elementIDs: if elementFilters or status or returnIdOnly: raise ValueError( "Can't specify extra filters (or return id's) when using element id's with getElements()" ) elements = [ CouchWorkQueueElement(db, i).load() for i in elementIDs ] else: options = { 'include_docs': True, 'filter': elementFilters, 'idOnly': returnIdOnly, 'reduce': False } # filter on workflow or status if possible filterName = 'elementsByWorkflow' if WorkflowName: key.append(WorkflowName) elif status: filterName = 'elementsByStatus' key.append(status) elif elementFilters.get('SubscriptionId'): key.append(elementFilters['SubscriptionId']) filterName = 'elementsBySubscription' # add given params to filters if status: options['filter']['Status'] = status if WorkflowName: options['filter']['RequestName'] = WorkflowName view = db.loadList('WorkQueue', 'filter', filterName, options, key) view = json.loads(view) if returnIdOnly: return view elements = [ CouchWorkQueueElement.fromDocument(db, row) for row in view ] if loadSpec: specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = self.getWMSpec(ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def getInboxElements(self, *args, **kwargs): """ Return elements from Inbox, supports same semantics as getElements() """ return self.getElements(*args, db=self.inbox, **kwargs) def getElementsForWorkflow(self, workflow): """Get elements for a workflow""" elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': workflow, 'include_docs': True, 'reduce': False }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParent(self, parent): """Get elements with the given parent""" elements = self.db.loadView('WorkQueue', 'elementsByParent', { 'key': parent.id, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def saveElements(self, *elements): """Persist elements Returns elements successfully saved, user must verify to catch errors """ result = [] if not elements: return result for element in elements: element.save() answer = elements[0]._couch.commit() result, failures = formatReply(answer, *elements) msg = 'Couch error saving element: "%s", error "%s", reason "%s"' for failed in failures: self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) return result def _raiseConflictErrorAndLog(self, conflictIDs, updatedParams, dbName="workqueue"): errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % ( dbName, conflictIDs, updatedParams) self.logger.error(errorMsg) raise WorkQueueError(errorMsg) def updateElements(self, *elementIds, **updatedParams): """Update given element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.db.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams) return def updateInboxElements(self, *elementIds, **updatedParams): """Update given inbox element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams, "workqueue_inbox") return def deleteElements(self, *elements): """Delete elements""" if not elements: return specs = {} for i in elements: i.delete() specs[i['RequestName']] = None answer = elements[0]._couch.commit() _, failures = formatReply(answer, *elements) msg = 'Couch error deleting element: "%s", error "%s", reason "%s"' for failed in failures: # only count delete as failed if document still exists if elements[0]._couch.documentExists(failed['id']): self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) # delete specs if no longer used for wf in specs: try: if not self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': wf, 'limit': 1, 'reduce': False })['rows']: self.db.delete_doc(wf) except CouchNotFoundError: pass def availableWork(self, thresholds, siteJobCounts, team=None, wfs=None, excludeWorkflows=None, numElems=9999999): """ Get work which is available to be run Assume thresholds is a dictionary; keys are the site name, values are the maximum number of running jobs at that site. Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site name and task priorities. The value is the number of jobs running at that priority. It will pull work until it reaches the number of elements configured (numElems). Since it's also used for calculating free resources, default it to "infinity" Note: this method will be called with no limit of work elements when it's simply calculating the resources available (based on what is in LQ), before it gets work from GQ """ self.logger.info("Getting up to %d available work from %s", numElems, self.queueUrl) excludeWorkflows = excludeWorkflows or [] elements = [] sortedElements = [] # We used to pre-filter sites, looking to see if there are idle job slots # We don't do this anymore, as we may over-allocate # jobs to sites if the new jobs have a higher priority. # If there are no sites, punt early. if not thresholds: self.logger.error("No thresholds is set: Please check") return elements, thresholds, siteJobCounts options = {} options['include_docs'] = True options['descending'] = True options['resources'] = thresholds if team: options['team'] = team self.logger.info("setting team to %s" % team) if wfs: result = [] for i in xrange(0, len(wfs), 20): options['wfs'] = wfs[i:i + 20] data = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result.extend(json.loads(data)) else: result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result = json.loads(result) if len(result) == 0: self.logger.info( """No available work in WQ or didn't pass workqueue restriction - check Pileup, site white list, etc""") self.logger.debug("Available Work:\n %s \n for resources\n %s" % (result, thresholds)) # Iterate through the results; apply whitelist / blacklist / data # locality restrictions. Only assign jobs if they are high enough # priority. for i in result: element = CouchWorkQueueElement.fromDocument(self.db, i) # filter out exclude list from abvaling if element['RequestName'] not in excludeWorkflows: sortedElements.append(element) # sort elements to get them in priority first and timestamp order sortedElements.sort(key=lambda element: element['CreationTime']) sortedElements.sort(key=lambda x: x['Priority'], reverse=True) for element in sortedElements: if numElems <= 0: self.logger.info( "Reached the maximum number of elements to be pulled: %d", len(elements)) break if not possibleSites(element): self.logger.info("No possible sites for %s with doc id %s", element['RequestName'], element.id) continue prio = element['Priority'] possibleSite = None sites = thresholds.keys() random.shuffle(sites) for site in sites: if element.passesSiteRestriction(site): # Count the number of jobs currently running of greater priority curJobCount = sum([ x[1] if x[0] >= prio else 0 for x in siteJobCounts.get(site, {}).items() ]) self.logger.debug( "Job Count: %s, site: %s thresholds: %s" % (curJobCount, site, thresholds[site])) if curJobCount < thresholds[site]: possibleSite = site break if possibleSite: numElems -= 1 self.logger.debug("Possible site exists %s" % str(possibleSite)) elements.append(element) if possibleSite not in siteJobCounts: siteJobCounts[possibleSite] = {} siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \ element['Jobs'] * element.get('blowupFactor', 1.0) else: self.logger.debug( "No available resources for %s with doc id %s", element['RequestName'], element.id) return elements, thresholds, siteJobCounts def getActiveData(self): """Get data items we have work in the queue for""" data = self.db.loadView('WorkQueue', 'activeData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActiveParentData(self): """Get data items we have work in the queue for with parent""" data = self.db.loadView('WorkQueue', 'activeParentData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActivePileupData(self): """Get data items we have work in the queue for with pileup""" data = self.db.loadView('WorkQueue', 'activePileupData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getElementsForData(self, data): """Get active elements for this dbs & data combo""" elements = self.db.loadView('WorkQueue', 'elementsByData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParentData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByParentData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForPileupData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByPileupData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def isAvailable(self): """Is the server available, i.e. up and not compacting""" try: compacting = self.db.info()['compact_running'] if compacting: self.logger.info("CouchDB compacting - try again later.") return False except Exception as ex: self.logger.error("CouchDB unavailable: %s" % str(ex)) return False return True def getWorkflows(self, includeInbox=False, includeSpecs=False): """Returns workflows known to workqueue""" result = set([ x['key'] for x in self.db.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeInbox: result = result | set([ x['key'] for x in self.inbox.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeSpecs: result = result | set([ x['key'] for x in self.db.loadView('WorkQueue', 'specsByWorkflow')['rows'] ]) return list(result) def queueLength(self): """Return number of available elements""" return self.db.loadView('WorkQueue', 'availableByPriority', {'limit': 0})['total_rows'] def fixConflicts(self): """Fix elements in conflict Each local queue runs this to resolve its conflicts with global, resolution propagates up to global. Conflicting elements are merged into one element with others deleted. This will fail if elements are modified during the resolution - if this happens rerun. """ for db in [self.inbox, self.db]: for row in db.loadView('WorkQueue', 'conflicts')['rows']: element_id = row['id'] try: conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \ for rev in row['value']] fixed_elements = fixElementConflicts(*conflicting_elements) if self.saveElements(fixed_elements[0]): self.saveElements( *fixed_elements[1:] ) # delete others (if merged value update accepted) except Exception as ex: self.logger.error("Error resolving conflict for %s: %s" % (element_id, str(ex))) def recordTaskActivity(self, taskname, comment=''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex))) def getWMBSInjectStatus(self, request=None): """ This service only provided by global queue except on draining agent """ options = {'group': True, 'reduce': True} if request: options.update(key=request) data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest', options) if request: if data['rows']: injectionStatus = data['rows'][0]['value'] inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False return injectionStatus and not requestOpen else: raise WorkQueueNoMatchingElements("%s not found" % request) else: injectionStatus = dict( (x['key'], x['value']) for x in data.get('rows', [])) finalInjectionStatus = [] for request in injectionStatus.keys(): inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False finalInjectionStatus.append( {request: injectionStatus[request] and not requestOpen}) return finalInjectionStatus def getWorkflowNames(self, inboxFlag=False): """Get workflow names from workqueue db""" if inboxFlag: db = self.inbox else: db = self.db data = db.loadView('WorkQueue', 'elementsByWorkflow', { 'stale': "update_after", 'reduce': True, 'group': True }) return [x['key'] for x in data.get('rows', [])] def deleteWQElementsByWorkflow(self, workflowNames): """ delete workqueue elements belongs to given workflow names it doen't check the status of workflow so need to be careful to use this. Pass only workflows which has the end status """ deleted = 0 dbs = [self.db, self.inbox] if not isinstance(workflowNames, list): workflowNames = [workflowNames] if len(workflowNames) == 0: return deleted options = {} options["stale"] = "update_after" options["reduce"] = False for couchdb in dbs: result = couchdb.loadView("WorkQueue", "elementsByWorkflow", options, workflowNames) ids = [] for entry in result["rows"]: ids.append(entry["id"]) if ids: couchdb.bulkDeleteByIDs(ids) deleted += len(ids) # delete the workflow with spec from workqueue db for wf in workflowNames: self.db.delete_doc(wf) return deleted
class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name = 'workqueue', inbox_name = None, parentQueue = None, queueUrl = None, logger = None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name == None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create = False, size = 10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create = False, size = 10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] def forceQueueSync(self): """Force a blocking replication - used only in tests""" self.pullFromParent(continuous = False) self.sendToParent(continuous = False) def pullFromParent(self, continuous = True, cancel = False): """Replicate from parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source = self.parentCouchUrl, destination = "%s/%s" % (self.hostWithAuth, self.inbox.name), filter = 'WorkQueue/queueFilter', query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl}, continuous = continuous, cancel = cancel) except Exception as ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex))) def sendToParent(self, continuous = True, cancel = False): """Replicate to parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source = "%s" % self.inbox.name, destination = self.parentCouchUrlWithAuth, filter = 'WorkQueue/queueFilter', query_params = {'childUrl' : self.queueUrl, 'parentUrl' : self.parentCouchUrl}, continuous = continuous, cancel = cancel) except Exception as ex: self.logger.warning('Replication to %s failed: %s' % (self.parentCouchUrl, str(ex))) def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status = 'Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def insertWMSpec(self, wmspec): """ Insert WMSpec to backend """ # Can't save spec to inbox, it needs to be visible to child queues # Can't save empty dict so add dummy variable dummy_values = {'name' : wmspec.name()} # change specUrl in spec before saving (otherwise it points to previous url) wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" % (self.db.name, wmspec.name())) return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values) def getWMSpec(self, name): """Get the spec""" wmspec = WMWorkloadHelper() wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name)) return wmspec def insertElements(self, units, parent = None): """ Insert element to database @param parent is the parent WorkQueueObject these element's belong to. i.e. a workflow which has been split """ if not units: return # store spec file separately - assume all elements share same spec self.insertWMSpec(units[0]['WMSpec']) for unit in units: # cast to couch if not isinstance(unit, CouchWorkQueueElement): unit = CouchWorkQueueElement(self.db, elementParams = dict(unit)) if parent: unit['ParentQueueId'] = parent.id unit['TeamName'] = parent['TeamName'] unit['WMBSUrl'] = parent['WMBSUrl'] if unit._couch.documentExists(unit.id): self.logger.info('Element "%s" already exists, skip insertion.' % unit.id) continue unit.save() unit._couch.commit(all_or_nothing = True) return def createWork(self, spec, **kwargs): """Return the Inbox element for this spec. This does not persist it to the database. """ kwargs.update({'WMSpec' : spec, 'RequestName' : spec.name(), 'StartPolicy' : spec.startPolicyParameters(), 'EndPolicy' : spec.endPolicyParameters(), 'OpenForNewData' : True }) unit = CouchWorkQueueElement(self.inbox, elementParams = kwargs) unit.id = spec.name() return unit def getElements(self, status = None, elementIDs = None, returnIdOnly = False, db = None, loadSpec = False, WorkflowName = None, **elementFilters): """Return elements that match requirements status, elementIDs & filters are 'AND'ed together to filter elements. returnIdOnly causes the element not to be loaded and only the id returned db is used to specify which database to return from loadSpec causes the workflow for each spec to be loaded. WorkflowName may be used in the place of RequestName """ key = [] if not db: db = self.db if elementFilters.get('RequestName') and not WorkflowName: WorkflowName = elementFilters.pop('RequestName') if elementIDs: if elementFilters or status or returnIdOnly: raise ValueError("Can't specify extra filters (or return id's) when using element id's with getElements()") elements = [CouchWorkQueueElement(db, i).load() for i in elementIDs] else: options = {'include_docs' : True, 'filter' : elementFilters, 'idOnly' : returnIdOnly, 'reduce' : False} # filter on workflow or status if possible filter = 'elementsByWorkflow' if WorkflowName: key.append(WorkflowName) elif status: filter = 'elementsByStatus' key.append(status) elif elementFilters.get('SubscriptionId'): key.append(elementFilters['SubscriptionId']) filter = 'elementsBySubscription' # add given params to filters if status: options['filter']['Status'] = status if WorkflowName: options['filter']['RequestName'] = WorkflowName view = db.loadList('WorkQueue', 'filter', filter, options, key) view = json.loads(view) if returnIdOnly: return view elements = [CouchWorkQueueElement.fromDocument(db, row) for row in view] if loadSpec: specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = self.getWMSpec(ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def getInboxElements(self, *args, **kwargs): """ Return elements from Inbox, supports same semantics as getElements() """ return self.getElements(*args, db = self.inbox, **kwargs) def getElementsForWorkflow(self, workflow): """Get elements for a workflow""" elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', {'key' : workflow, 'include_docs' : True, 'reduce' : False}) return [CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', [])] def getElementsForParent(self, parent): """Get elements with the given parent""" elements = self.db.loadView('WorkQueue', 'elementsByParent', {'key' : parent.id, 'include_docs' : True}) return [CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', [])] def saveElements(self, *elements): """Persist elements Returns elements successfully saved, user must verify to catch errors """ result = [] if not elements: return result for element in elements: element.save() answer = elements[0]._couch.commit() result, failures = formatReply(answer, *elements) msg = 'Couch error saving element: "%s", error "%s", reason "%s"' for failed in failures: self.logger.error(msg % (failed['id'], failed['error'], failed['reason'])) return result def updateElements(self, *elementIds, **updatedParams): """Update given element's (identified by id) with new parameters""" if not elementIds: return uri = "/" + self.db.name + "/_design/WorkQueue/_update/in-place/" optionsArg = {} if "options" in updatedParams: optionsArg.update(updatedParams.pop("options")) data = {"updates" : json.dumps(updatedParams), "options" : json.dumps(optionsArg)} for ele in elementIds: thisuri = uri + ele + "?" + urllib.urlencode(data) self.db.makeRequest(uri = thisuri, type = 'PUT') return def updateInboxElements(self, *elementIds, **updatedParams): """Update given inbox element's (identified by id) with new parameters""" uri = "/" + self.inbox.name + "/_design/WorkQueue/_update/in-place/" optionsArg = {} if "options" in updatedParams: optionsArg.update(updatedParams.pop("options")) data = {"updates" : json.dumps(updatedParams), "options" : json.dumps(optionsArg)} for ele in elementIds: thisuri = uri + ele + "?" + urllib.urlencode(data) self.inbox.makeRequest(uri = thisuri, type = 'PUT') return def deleteElements(self, *elements): """Delete elements""" if not elements: return specs = {} for i in elements: i.delete() specs[i['RequestName']] = None answer = elements[0]._couch.commit() result, failures = formatReply(answer, *elements) msg = 'Couch error deleting element: "%s", error "%s", reason "%s"' for failed in failures: # only count delete as failed if document still exists if elements[0]._couch.documentExists(failed['id']): self.logger.error(msg % (failed['id'], failed['error'], failed['reason'])) # delete specs if no longer used for wf in specs: try: if not self.db.loadView('WorkQueue', 'elementsByWorkflow', {'key' : wf, 'limit' : 1, 'reduce' : False})['rows']: self.db.delete_doc(wf) except CouchNotFoundError: pass def availableWork(self, thresholds, siteJobCounts, teams = None, wfs = None): """ Get work which is available to be run Assume thresholds is a dictionary; keys are the site name, values are the maximum number of running jobs at that site. Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site name and task priorities. The value is the number of jobs running at that priority. """ self.logger.info("Getting available work from %s/%s" % (sanitizeURL(self.server.url)['url'], self.db.name)) elements = [] # We used to pre-filter sites, looking to see if there are idle job slots # We don't do this anymore, as we may over-allocate # jobs to sites if the new jobs have a higher priority. # If there are no sites, punt early. if not thresholds: self.logger.error("No thresholds is set: Please check") return elements, thresholds, siteJobCounts options = {} options['include_docs'] = True options['descending'] = True options['resources'] = thresholds if teams: options['teams'] = teams self.logger.info("setting teams %s" % teams) if wfs: result = [] for i in xrange(0, len(wfs), 20): options['wfs'] = wfs[i:i+20] data = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result.extend(json.loads(data)) # sort final list result.sort(key = lambda x: x['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement']['Priority']) else: result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result = json.loads(result) if len(result) == 0: self.logger.info("""No available work in WQ or didn't pass workqueue restriction - check Pileup, site white list, etc""") self.logger.debug("Available Work:\n %s \n for resources\n %s" % (result, thresholds)) # Iterate through the results; apply whitelist / blacklist / data # locality restrictions. Only assign jobs if they are high enough # priority. for i in result: element = CouchWorkQueueElement.fromDocument(self.db, i) prio = element['Priority'] possibleSite = None sites = thresholds.keys() random.shuffle(sites) for site in sites: if element.passesSiteRestriction(site): # Count the number of jobs currently running of greater priority prio = element['Priority'] curJobCount = sum(map(lambda x : x[1] if x[0] >= prio else 0, siteJobCounts.get(site, {}).items())) self.logger.debug("Job Count: %s, site: %s threshods: %s" % (curJobCount, site, thresholds[site])) if curJobCount < thresholds[site]: possibleSite = site break if possibleSite: self.logger.debug("Possible site exists %s" % str(possibleSite)) elements.append(element) if site not in siteJobCounts: siteJobCounts[site] = {} siteJobCounts[site][prio] = siteJobCounts[site].setdefault(prio, 0) + element['Jobs']*element.get('blowupFactor', 1.0) else: self.logger.info("No possible site for %s" % element['RequestName']) # sort elements to get them in priority first and timestamp order elements.sort(key=lambda element: element['CreationTime']) elements.sort(key = lambda x: x['Priority'], reverse = True) return elements, thresholds, siteJobCounts def getActiveData(self): """Get data items we have work in the queue for""" data = self.db.loadView('WorkQueue', 'activeData', {'reduce' : True, 'group' : True}) return [{'dbs_url' : x['key'][0], 'name' : x['key'][1]} for x in data.get('rows', [])] def getActiveParentData(self): """Get data items we have work in the queue for with parent""" data = self.db.loadView('WorkQueue', 'activeParentData', {'reduce' : True, 'group' : True}) return [{'dbs_url' : x['key'][0], 'name' : x['key'][1]} for x in data.get('rows', [])] def getActivePileupData(self): """Get data items we have work in the queue for with pileup""" data = self.db.loadView('WorkQueue', 'activePileupData', {'reduce' : True, 'group' : True}) return [{'dbs_url' : x['key'][0], 'name' : x['key'][1]} for x in data.get('rows', [])] def getElementsForData(self, dbs, data): """Get active elements for this dbs & data combo""" elements = self.db.loadView('WorkQueue', 'elementsByData', {'key' : data, 'include_docs' : True}) return [CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', [])] def getElementsForParentData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByParentData', {'key' : data, 'include_docs' : True}) return [CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', [])] def getElementsForPileupData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByPileupData', {'key' : data, 'include_docs' : True}) return [CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', [])] def isAvailable(self): """Is the server available, i.e. up and not compacting""" try: compacting = self.db.info()['compact_running'] if compacting: self.logger.info("CouchDB compacting - try again later.") return False except Exception as ex: self.logger.error("CouchDB unavailable: %s" % str(ex)) return False return True def getWorkflows(self, includeInbox = False, includeSpecs = False): """Returns workflows known to workqueue""" result = set([x['key'] for x in self.db.loadView('WorkQueue', 'elementsByWorkflow', {'group' : True})['rows']]) if includeInbox: result = result | set([x['key'] for x in self.inbox.loadView('WorkQueue', 'elementsByWorkflow', {'group' : True})['rows']]) if includeSpecs: result = result | set([x['key'] for x in self.db.loadView('WorkQueue', 'specsByWorkflow')['rows']]) return list(result) def queueLength(self): """Return number of available elements""" return self.db.loadView('WorkQueue', 'availableByPriority', {'limit' : 0})['total_rows'] def fixConflicts(self): """Fix elements in conflict Each local queue runs this to resolve its conflicts with global, resolution propagates up to global. Conflicting elements are merged into one element with others deleted. This will fail if elements are modified during the resolution - if this happens rerun. """ for db in [self.inbox, self.db]: for row in db.loadView('WorkQueue', 'conflicts')['rows']: element_id = row['id'] try: conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \ for rev in row['value']] fixed_elements = fixElementConflicts(*conflicting_elements) if self.saveElements(fixed_elements[0]): self.saveElements(*fixed_elements[1:]) # delete others (if merged value update accepted) except Exception as ex: self.logger.error("Error resolving conflict for %s: %s" % (element_id, str(ex))) def recordTaskActivity(self, taskname, comment = ''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex))) def getWMBSInjectStatus(self, request = None): """ This service only provided by global queue """ options = {'group' : True} if request: options.update(key = request) data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest', options) if request: if data['rows']: injectionStatus = data['rows'][0]['value'] inboxElement = self.getInboxElements(elementIDs = [data['rows'][0]['key']]) return injectionStatus and not inboxElement[0].get('OpenForNewData', False) else: raise WorkQueueNoMatchingElements("%s not found" % request) else: injectionStatus = dict((x['key'], x['value']) for x in data.get('rows', [])) inboxElements = self.getInboxElements(elementIDs = injectionStatus.keys()) finalInjectionStatus = [] for element in inboxElements: if not element.get('OpenForNewData', False) and injectionStatus[element._id]: finalInjectionStatus.append({element._id : True}) else: finalInjectionStatus.append({element._id : False}) return finalInjectionStatus def getWorkflowNames(self, inboxFlag = False): """Get workflow names from workqueue db""" if inboxFlag: db = self.inbox else: db = self.db data = db.loadView('WorkQueue', 'elementsByWorkflow', {'stale': "update_after", 'reduce' : True, 'group' : True}) return [x['key'] for x in data.get('rows', [])] def deleteWQElementsByWorkflow(self, workflowNames): """ delete workqueue elements belongs to given workflow names it doen't check the status of workflow so need to be careful to use this. Pass only workflows which has the end status """ deleted = 0 dbs = [self.db, self.inbox] if not isinstance(workflowNames, list): workflowNames = [workflowNames] if len(workflowNames) == 0: return deleted options = {} options["stale"] = "update_after" options["reduce"] = False for couchdb in dbs: result = couchdb.loadView("WorkQueue", "elementsByWorkflow", options, workflowNames) ids = [] for entry in result["rows"]: ids.append(entry["id"]) if ids: couchdb.bulkDeleteByIDs(ids) deleted += len(ids) # delete the workflow with spec from workqueue db for wf in workflowNames: self.db.delete_doc(wf) return deleted
class CMSCouchTest(unittest.TestCase): test_counter = 0 def setUp(self): # Make an instance of the server self.server = CouchServer(os.getenv("COUCHURL", 'http://*****:*****@localhost:5984')) self.testname = self.id().split('.')[-1] # Create a database, drop an existing one first dbname = 'cmscouch_unittest_%s' % self.testname.lower() if dbname in self.server.listDatabases(): self.server.deleteDatabase(dbname) self.server.createDatabase(dbname) self.db = self.server.connectDatabase(dbname) def tearDown(self): if sys.exc_info()[0] == None: # This test has passed, clean up after it dbname = 'cmscouch_unittest_%s' % self.testname.lower() self.server.deleteDatabase(dbname) def testCommitOne(self): # Can I commit one dict doc = {'foo':123, 'bar':456} id = self.db.commitOne(doc)[0]['id'] # What about a Document doc = Document(inputDict = doc) id = self.db.commitOne(doc)[0]['id'] def testCommitOneWithQueue(self): """ CommitOne bypasses the queue, but it should maintain the queue if present for a future call to commit. """ # Queue up five docs doc = {'foo':123, 'bar':456} for i in range(1,6): self.db.queue(doc) # Commit one Document doc = Document(inputDict = doc) id = self.db.commitOne(doc)[0]['id'] self.assertEqual(1, len(self.db.allDocs()['rows'])) self.db.commit() self.assertEqual(6, len(self.db.allDocs()['rows'])) def testTimeStamping(self): doc = {'foo':123, 'bar':456} id = self.db.commitOne(doc, timestamp=True)[0]['id'] doc = self.db.document(id) self.assertTrue('timestamp' in doc.keys()) def testDeleteDoc(self): doc = {'foo':123, 'bar':456} self.db.commitOne(doc) all_docs = self.db.allDocs() self.assertEqual(1, len(all_docs['rows'])) # The db.delete_doc is immediate id = all_docs['rows'][0]['id'] self.db.delete_doc(id) all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testDeleteQueuedDocs(self): doc1 = {'foo':123, 'bar':456} doc2 = {'foo':789, 'bar':101112} self.db.queue(doc1) self.db.queue(doc2) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) for res in all_docs['rows']: id = res['id'] doc = self.db.document(id) self.db.queueDelete(doc) all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testReplicate(self): repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo':123}, timestamp=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) # wait for a few seconds to replication to be triggered. time.sleep(1) self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id)) self.server.deleteDatabase(repl_db.name) def testSlashInDBName(self): """ Slashes are a valid character in a database name, and are useful as it creates a directory strucutre for the couch data files. """ db_name = 'wmcore/unittests' try: self.server.deleteDatabase(db_name) except: # Ignore this - the database shouldn't already exist pass db = self.server.createDatabase(db_name) info = db.info() assert info['db_name'] == db_name db_name = 'wmcore/unittests' db = self.server.connectDatabase(db_name) info = db.info() assert info['db_name'] == db_name db = Database(db_name, url = os.environ["COUCHURL"]) info = db.info() assert info['db_name'] == db_name self.server.deleteDatabase(db_name) def testInvalidName(self): """ Capitol letters are not allowed in database names. """ db_name = 'Not A Valid Name' self.assertRaises(ValueError, self.server.createDatabase, db_name) self.assertRaises(ValueError, self.server.deleteDatabase, db_name) self.assertRaises(ValueError, self.server.connectDatabase, db_name) self.assertRaises(ValueError, Database, db_name) def testDocumentSerialisation(self): """ A document should be writable into the couchdb with a timestamp. """ d = Document() d['foo'] = 'bar' doc_info = self.db.commit(doc=d, timestamp=True)[0] d_from_db = self.db.document(doc_info['id']) self.assertEqual(d['foo'], d_from_db['foo']) self.assertEqual(d['timestamp'], d_from_db['timestamp']) def testAttachments(self): """ Test uploading attachments with and without checksumming """ doc = self.db.commitOne({'foo':'bar'}, timestamp=True)[0] attachment1 = "Hello" attachment2 = "How are you today?" attachment3 = "I'm very well, thanks for asking" attachment4 = "Lovely weather we're having" attachment5 = "Goodbye" keyhash = hashlib.md5() keyhash.update(attachment5) attachment5_md5 = keyhash.digest() attachment5_md5 = base64.b64encode(attachment5_md5) attachment6 = "Good day to you, sir!" #TODO: add a binary attachment - e.g. tar.gz doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5) self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123') def testRevisionHandling(self): # This test won't work from an existing database, conflicts will be preserved, so # ruthlessly remove the databases to get a clean slate. try: self.server.deleteDatabase(self.db.name) except CouchNotFoundError: pass # Must have been deleted already try: self.server.deleteDatabase(self.db.name + 'repl') except CouchNotFoundError: pass # Must have been deleted already # I'm going to create a conflict, so need a replica db self.db = self.server.connectDatabase(self.db.name) repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo':123}, timestamp=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) time.sleep(1) doc_v2 = self.db.document(doc_id) doc_v2['bar'] = 456 doc_id_rev2 = self.db.commitOne(doc_v2)[0] doc_v2 = self.db.document(doc_id) #now update the replica conflict_doc = repl_db.document(doc_id) conflict_doc['bar'] = 101112 repl_db.commitOne(conflict_doc) #replicate, creating the conflict self.server.replicate(self.db.name, repl_db.name) time.sleep(1) conflict_view = {'map':"function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}"} data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) # Should have one conflict in the repl database self.assertEqual(data['total_rows'], 1) # Should have no conflicts in the source database self.assertEqual(self.db.post('/%s/_temp_view' % self.db.name, conflict_view)['total_rows'], 0) self.assertTrue(repl_db.documentExists(data['rows'][0]['id'], rev=data['rows'][0]['key'][0])) repl_db.delete_doc(data['rows'][0]['id'], rev=data['rows'][0]['key'][0]) data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) self.assertEqual(data['total_rows'], 0) self.server.deleteDatabase(repl_db.name) #update it again doc_v3 = self.db.document(doc_id) doc_v3['baz'] = 789 doc_id_rev3 = self.db.commitOne(doc_v3)[0] doc_v3 = self.db.document(doc_id) #test that I can pull out an old revision doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev']) self.assertEqual(doc_v1, doc_v1_test) #test that I can check a revision exists self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertFalse(self.db.documentExists(doc_id, rev='1'+doc_v2['_rev'])) #why you shouldn't rely on rev self.db.compact(blocking=True) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev'])) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev'])) def testCommit(self): """ Test queue and commit modes """ # try to commit 2 random docs doc = {'foo':123, 'bar':456} self.db.queue(doc) self.db.queue(doc) self.assertEqual(2, len(self.db.commit())) # committing 2 docs with the same id will fail self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "1", inputDict = {'foo':1234, 'bar':456})) answer = self.db.commit() self.assertEqual(2, len(answer)) self.assertEqual(answer[0]['ok'], True) self.assertEqual(answer[1]['error'], 'conflict') # all_or_nothing mode ignores conflicts self.db.queue(Document(id = "2", inputDict = doc)) self.db.queue(Document(id = "2", inputDict = {'foo':1234, 'bar':456})) answer = self.db.commit(all_or_nothing = True) self.assertEqual(2, len(answer)) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0]['id'], '2') self.assertEqual(answer[1]['id'], '2') # callbacks can do stuff when conflicts arise # this particular one just overwrites the document def callback(db, data, result): for doc in data['docs']: if doc['_id'] == result['id']: doc['_rev'] = db.document(doc['_id'])['_rev'] retval = db.commitOne(doc) return retval[0] self.db.queue(Document(id = "2", inputDict = {'foo':5, 'bar':6})) answer = self.db.commit(callback = callback) self.assertEqual(1, len(answer)) self.assertEqual(answer[0].get('error'), None) updatedDoc = self.db.document('2') self.assertEqual(updatedDoc['foo'], 5) self.assertEqual(updatedDoc['bar'], 6) return def testUpdateHandler(self): """ Test that update function support works """ update_ddoc = { '_id':'_design/foo', 'language': 'javascript', 'updates':{ "bump-counter" : 'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}', } } self.db.commit(update_ddoc) doc = {'foo': 123, 'counter': 0} doc_id = self.db.commit(doc)[0]['id'] self.assertEqual("bumped it!", self.db.updateDocument(doc_id, 'foo', 'bump-counter')) self.assertEqual(1, self.db.document(doc_id)['counter']) def testList(self): """ Test list function works ok """ update_ddoc = { '_id':'_design/foo', 'language': 'javascript', 'views' : { 'all' : { 'map' : 'function(doc) {emit(null, null) }' }, }, 'lists' : { 'errorinoutput' : 'function(doc, req) {send("A string with the word error in")}', 'malformed' : 'function(doc, req) {somethingtoraiseanerror}', } } self.db.commit(update_ddoc) # approriate errors raised self.assertRaises(CouchNotFoundError, self.db.loadList, 'foo', 'error', 'view_doesnt_exist') self.assertRaises(CouchInternalServerError, self.db.loadList, 'foo', 'malformed', 'all') # error in list output string shouldn't raise an error self.assertEqual(self.db.loadList('foo', 'errorinoutput', 'all'), "A string with the word error in") def testAllDocs(self): """ Test AllDocs with options """ self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "2", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "3", inputDict = {'foo':123, 'bar':456})) self.db.commit() self.assertEqual(3, len(self.db.allDocs()['rows'])) self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows'])) self.assertEqual(2, len(self.db.allDocs(keys = ["1", "3"])['rows'])) self.assertEqual(1, len(self.db.allDocs({'limit':1}, ["1", "3"])['rows'])) self.assertTrue('error' in self.db.allDocs(keys = ["1", "4"])['rows'][1]) def testUpdateBulkDocuments(self): """ Test AllDocs with options """ self.db.queue(Document(id="1", inputDict={'foo':123, 'bar':456})) self.db.queue(Document(id="2", inputDict={'foo':123, 'bar':456})) self.db.queue(Document(id="3", inputDict={'foo':123, 'bar':456})) self.db.commit() self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 333}, 2) result = self.db.allDocs({"include_docs": True})['rows'] self.assertEqual(3, len(result)) for item in result: self.assertEqual(333, item['doc']['foo']) self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 222}, 10) result = self.db.allDocs({"include_docs": True})['rows'] self.assertEqual(3, len(result)) for item in result: self.assertEqual(222, item['doc']['foo']) def testUpdateHandlerAndBulkUpdateProfile(self): """ Test that update function support works """ # for actual test increase the size value: For 10000 records, 96 sec vs 4 sec size = 100 for i in range(size): self.db.queue(Document(id="%s" % i, inputDict={'name':123, 'counter':0})) update_doc = { '_id':'_design/foo', 'language': 'javascript', 'updates':{ "change-counter" : """function(doc, req) { if (doc) { var data = JSON.parse(req.body); for (var field in data) {doc.field = data.field;} return [doc, 'OK'];}}""", } } self.db.commit(update_doc) start = time.time() for id in range(size): doc_id = "%s" % id self.db.updateDocument(doc_id, 'foo', 'change-counter', {'counter': 1}, useBody=True) end = time.time() print("update handler: %s sec" % (end - start)) start = time.time() ids = [] for id in range(size): doc_id = "%s" % id ids.append(doc_id) self.db.updateBulkDocumentsWithConflictHandle(ids, {'counter': 2}, 1000) end = time.time() print("bulk update: %s sec" % (end - start))
class WMStatsWriter(WMStatsReader): def __init__(self, couchURL, dbName = None): # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) def uploadData(self, docs): """ upload to given couchURL using cert and key authentication and authorization """ # add delete docs as well for the compaction # need to check whether delete and update is successful if type(docs) == dict: docs = [docs] for doc in docs: self.couchDB.queue(doc) return self.couchDB.commit(returndocs = True) def insertRequest(self, schema): doc = monitorDocFromRequestSchema(schema) return self.insertGenericRequest(doc) def insertGenericRequest(self, doc): result = self.couchDB.updateDocument(doc['_id'], 'WMStats', 'insertRequest', fields={'doc': JSONEncoder().encode(doc)}) self.updateRequestStatus(doc['_id'], "new") return result def updateRequestStatus(self, request, status): statusTime = {'status': status, 'update_time': int(time.time())} return self.couchDB.updateDocument(request, 'WMStats', 'requestStatus', fields={'request_status': JSONEncoder().encode(statusTime)}) def updateTeam(self, request, team): return self.couchDB.updateDocument(request, 'WMStats', 'team', fields={'team': team}) def insertTotalStats(self, request, totalStats): """ update the total stats of given workflow (total_jobs, input_events, input_lumis, input_num_files) """ return self.couchDB.updateDocument(request, 'WMStats', 'totalStats', fields=totalStats) def updateFromWMSpec(self, spec): # currently only update priority and siteWhitelist # complex field needs to be JSON encoded # assuming all the toplevel tasks has the same site white lists #priority is priority + user priority + group priority fields = {'priority': spec.priority(), 'site_white_list': spec.getTopLevelTask()[0].siteWhitelist()} return self.couchDB.updateDocument(spec.name(), 'WMStats', 'generalFields', fields={'general_fields': JSONEncoder().encode(fields)}) def updateRequestsInfo(self, docs): """ bulk update for request documents. TODO: change to bulk update handler when it gets supported """ for doc in docs: del doc['type'] self.couchDB.updateDocument(doc['workflow'], 'WMStats', 'generalFields', fields={'general_fields': JSONEncoder().encode(doc)}) def updateAgentInfo(self, agentInfo): return self.couchDB.updateDocument(agentInfo['_id'], 'WMStats', 'agentInfo', fields={'agent_info': JSONEncoder().encode(agentInfo)}) def deleteOldDocs(self, days): """ delete the documents from wmstats db older than param 'days' """ sec = int(days * 24 * 60 *60) threshold = int(time.time()) - sec options = {"startkey": threshold, "descending": True, "stale": "update_after"} result = self.couchDB.loadView("WMStats", "time", options) for row in result['rows']: doc = {} doc['_id'] = row['value']['id'] doc['_rev'] = row['value']['rev'] self.couchDB.queueDelete(doc) committed = self.couchDB.commit() if committed: errorReport = {} deleted = 0 for data in committed: if data.has_key('error'): errorReport.setdefault(data['error'], 0) errorReport[data['error']] += 1 else: deleted += 1 return {'delete': deleted, 'error': errorReport} else: return "nothing" def replicate(self, target): return self.couchServer.replicate(self.dbName, target, continuous = True, filter = 'WMStats/repfilter', useReplicator = True) def getDBInstance(self): return self.couchDB
destCouchHost = sys.argv[1] destDbBase = sys.argv[2] destCouchServer = CouchServer(dburl = destCouchHost) srcCouchServer = CouchServer(dburl = srcCouchHost) srcJobsDb = srcCouchHost + "/" + srcDbBase + "%2Fjobs" destJobsDb = destCouchHost + "/" + destDbBase + "%2Fjobs" srcFwjrsDb = srcCouchHost + "/" + srcDbBase + "%2Ffwjrs" destFwjrsDb = destCouchHost + "/" + destDbBase + "%2Ffwjrs" print "Archiving %s/%s to %s/%s..." % (srcCouchHost, srcDbBase, destCouchHost, destDbBase) # Replicate the FWJR and Jobs databases... print " Replicating jobs database..." destCouchServer.replicate(srcJobsDb, destJobsDb, create_target = True) print " Replication fwjrs database..." destCouchServer.replicate(srcFwjrsDb, destFwjrsDb, create_target = True) # Generate views for the various databases destJobsDb = destCouchServer.connectDatabase(destDbBase + "/jobs") destFwjrsDb = destCouchServer.connectDatabase(destDbBase + "/fwjrs") print " Triggering view generation for jobs database..." destJobsDb.loadView("JobDump", "statusByWorkflowName", options = {"limit": 1}) print " Triggering view generation for fwjrs database..." destFwjrsDb.loadView("FWJRDump", "outputByWorkflowName", options = {"limit": 1}) print "" # Query destination DB for list of workflows summaryBase = "%s/%s%%2Ffwjrs/_design/FWJRDump/_show/workflowSummary/%s" # dest host, dest db base, workflow name successBase = "%s/%s%%2Fjobs/_design/JobDump/_list/successJobs/statusByWorkflowName?startkey=%%5B%%22%s%%22%%5D&endkey=%%5B%%22%s%%22%%2C%%7B%%7D%%5D&reduce=false" # dest host, dest db base, workflow, workflow
class CMSCouchTest(unittest.TestCase): test_counter = 0 def setUp(self): # Make an instance of the server self.server = CouchServer( os.getenv("COUCHURL", 'http://*****:*****@localhost:5984')) self.testname = self.id().split('.')[-1] # Create a database, drop an existing one first dbname = 'cmscouch_unittest_%s' % self.testname.lower() if dbname in self.server.listDatabases(): self.server.deleteDatabase(dbname) self.server.createDatabase(dbname) self.db = self.server.connectDatabase(dbname) def tearDown(self): if sys.exc_info()[0] == None: # This test has passed, clean up after it dbname = 'cmscouch_unittest_%s' % self.testname.lower() self.server.deleteDatabase(dbname) def testCommitOne(self): # Can I commit one dict doc = {'foo': 123, 'bar': 456} id = self.db.commitOne(doc, returndocs=True)[0]['id'] # What about a Document doc = Document(inputDict=doc) id = self.db.commitOne(doc, returndocs=True)[0]['id'] def testCommitOneWithQueue(self): """ CommitOne bypasses the queue, but it should maintain the queue if present for a future call to commit. """ # Queue up five docs doc = {'foo': 123, 'bar': 456} for i in range(1, 6): self.db.queue(doc) # Commit one Document doc = Document(inputDict=doc) id = self.db.commitOne(doc, returndocs=True)[0]['id'] self.assertEqual(1, len(self.db.allDocs()['rows'])) self.db.commit() self.assertEqual(6, len(self.db.allDocs()['rows'])) def testTimeStamping(self): doc = {'foo': 123, 'bar': 456} id = self.db.commitOne(doc, timestamp=True, returndocs=True)[0]['id'] doc = self.db.document(id) self.assertTrue('timestamp' in doc.keys()) def testDeleteDoc(self): doc = {'foo': 123, 'bar': 456} self.db.commitOne(doc) all_docs = self.db.allDocs() self.assertEqual(1, len(all_docs['rows'])) # The db.delete_doc is immediate id = all_docs['rows'][0]['id'] self.db.delete_doc(id) all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testDeleteQueuedDocs(self): doc1 = {'foo': 123, 'bar': 456} doc2 = {'foo': 789, 'bar': 101112} self.db.queue(doc1) self.db.queue(doc2) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) for res in all_docs['rows']: id = res['id'] doc = self.db.document(id) self.db.queueDelete(doc) all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testWriteReadDocNoID(self): doc = {} def testReplicate(self): repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo': 123}, timestamp=True, returndocs=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id)) self.server.deleteDatabase(repl_db.name) def testSlashInDBName(self): """ Slashes are a valid character in a database name, and are useful as it creates a directory strucutre for the couch data files. """ db_name = 'wmcore/unittests' try: self.server.deleteDatabase(db_name) except: # Ignore this - the database shouldn't already exist pass db = self.server.createDatabase(db_name) info = db.info() assert info['db_name'] == db_name db_name = 'wmcore/unittests' db = self.server.connectDatabase(db_name) info = db.info() assert info['db_name'] == db_name db = Database(db_name, url=os.environ["COUCHURL"]) info = db.info() assert info['db_name'] == db_name self.server.deleteDatabase(db_name) def testInvalidName(self): """ Capitol letters are not allowed in database names. """ db_name = 'Not A Valid Name' self.assertRaises(ValueError, self.server.createDatabase, db_name) self.assertRaises(ValueError, self.server.deleteDatabase, db_name) self.assertRaises(ValueError, self.server.connectDatabase, db_name) self.assertRaises(ValueError, Database, db_name) def testDocumentSerialisation(self): """ A document should be writable into the couchdb with a timestamp. """ d = Document() d['foo'] = 'bar' doc_info = self.db.commit(doc=d, timestamp=True)[0] d_from_db = self.db.document(doc_info['id']) self.assertEqual(d['foo'], d_from_db['foo']) self.assertEqual(d['timestamp'], d_from_db['timestamp']) def testAttachments(self): """ Test uploading attachments with and without checksumming """ doc = self.db.commitOne({'foo': 'bar'}, timestamp=True, returndocs=True)[0] attachment1 = "Hello" attachment2 = "How are you today?" attachment3 = "I'm very well, thanks for asking" attachment4 = "Lovely weather we're having" attachment5 = "Goodbye" keyhash = hashlib.md5() keyhash.update(attachment5) attachment5_md5 = keyhash.digest() attachment5_md5 = base64.b64encode(attachment5_md5) attachment6 = "Good day to you, sir!" #TODO: add a binary attachment - e.g. tar.gz doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5) self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123') def testRevisionHandling(self): # This test won't work from an existing database, conflicts will be preserved, so # ruthlessly remove the databases to get a clean slate. try: self.server.deleteDatabase(self.db.name) except CouchNotFoundError: pass # Must have been deleted already try: self.server.deleteDatabase(self.db.name + 'repl') except CouchNotFoundError: pass # Must have been deleted already # I'm going to create a conflict, so need a replica db self.db = self.server.connectDatabase(self.db.name) repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo': 123}, timestamp=True, returndocs=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) doc_v2 = self.db.document(doc_id) doc_v2['bar'] = 456 doc_id_rev2 = self.db.commitOne(doc_v2, returndocs=True)[0] doc_v2 = self.db.document(doc_id) #now update the replica conflict_doc = repl_db.document(doc_id) conflict_doc['bar'] = 101112 repl_db.commitOne(conflict_doc) #replicate, creating the conflict self.server.replicate(self.db.name, repl_db.name) conflict_view = { 'map': "function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}" } data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) # Should have one conflict in the repl database self.assertEqual(data['total_rows'], 1) # Should have no conflicts in the source database self.assertEqual( self.db.post('/%s/_temp_view' % self.db.name, conflict_view)['total_rows'], 0) self.assertTrue( repl_db.documentExists(data['rows'][0]['id'], rev=data['rows'][0]['key'][0])) repl_db.delete_doc(data['rows'][0]['id'], rev=data['rows'][0]['key'][0]) data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) self.assertEqual(data['total_rows'], 0) self.server.deleteDatabase(repl_db.name) #update it again doc_v3 = self.db.document(doc_id) doc_v3['baz'] = 789 doc_id_rev3 = self.db.commitOne(doc_v3, returndocs=True)[0] doc_v3 = self.db.document(doc_id) #test that I can pull out an old revision doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev']) self.assertEqual(doc_v1, doc_v1_test) #test that I can check a revision exists self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertFalse( self.db.documentExists(doc_id, rev='1' + doc_v2['_rev'])) #why you shouldn't rely on rev self.db.compact(blocking=True) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev'])) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev'])) def testCommit(self): """ Test queue and commit modes """ # try to commit 2 random docs doc = {'foo': 123, 'bar': 456} self.db.queue(doc) self.db.queue(doc) self.assertEqual(2, len(self.db.commit())) # committing 2 docs with the same id will fail self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456})) self.db.queue(Document(id="1", inputDict={'foo': 1234, 'bar': 456})) answer = self.db.commit() self.assertEqual(2, len(answer)) self.assertEqual(answer[0]['error'], 'conflict') self.assertEqual(answer[1]['error'], 'conflict') # all_or_nothing mode ignores conflicts self.db.queue(Document(id="2", inputDict=doc)) self.db.queue(Document(id="2", inputDict={'foo': 1234, 'bar': 456})) answer = self.db.commit(all_or_nothing=True) self.assertEqual(2, len(answer)) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0]['id'], '2') self.assertEqual(answer[1]['id'], '2') # callbacks can do stuff when conflicts arise # this particular one just overwrites the document def callback(db, data, result): for doc in data['docs']: if doc['_id'] == result['id']: doc['_rev'] = db.document(doc['_id'])['_rev'] retval = db.commitOne(doc) return retval[0] self.db.queue(Document(id="2", inputDict={'foo': 5, 'bar': 6})) answer = self.db.commit(callback=callback) self.assertEqual(1, len(answer)) self.assertEqual(answer[0].get('error'), None) updatedDoc = self.db.document('2') self.assertEqual(updatedDoc['foo'], 5) self.assertEqual(updatedDoc['bar'], 6) return def testUpdateHandler(self): """ Test that update function support works """ update_ddoc = { '_id': '_design/foo', 'language': 'javascript', 'updates': { "bump-counter": 'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}', } } self.db.commit(update_ddoc) doc = {'foo': 123, 'counter': 0} doc_id = self.db.commit(doc)[0]['id'] self.assertEqual("bumped it!", self.db.updateDocument(doc_id, 'foo', 'bump-counter')) self.assertEqual(1, self.db.document(doc_id)['counter']) def testList(self): """ Test list function works ok """ update_ddoc = { '_id': '_design/foo', 'language': 'javascript', 'views': { 'all': { 'map': 'function(doc) {emit(null, null) }' }, }, 'lists': { 'errorinoutput': 'function(doc, req) {send("A string with the word error in")}', 'malformed': 'function(doc, req) {somethingtoraiseanerror}', } } self.db.commit(update_ddoc) # approriate errors raised self.assertRaises(CouchNotFoundError, self.db.loadList, 'foo', 'error', 'view_doesnt_exist') self.assertRaises(CouchInternalServerError, self.db.loadList, 'foo', 'malformed', 'all') # error in list output string shouldn't raise an error self.assertEqual(self.db.loadList('foo', 'errorinoutput', 'all'), "A string with the word error in") def testAllDocs(self): """ Test AllDocs with options """ self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456})) self.db.queue(Document(id="2", inputDict={'foo': 123, 'bar': 456})) self.db.queue(Document(id="3", inputDict={'foo': 123, 'bar': 456})) self.db.commit() self.assertEqual(3, len(self.db.allDocs()['rows'])) self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows'])) self.assertEqual(2, len(self.db.allDocs(keys=["1", "3"])['rows'])) self.assertEqual( 1, len(self.db.allDocs({'limit': 1}, ["1", "3"])['rows'])) self.assertTrue('error' in self.db.allDocs(keys=["1", "4"])['rows'][1])
class CMSCouchTest(unittest.TestCase): test_counter = 0 def setUp(self): # Make an instance of the server self.server = CouchServer(os.getenv("COUCHURL", 'http://*****:*****@localhost:5984')) self.testname = self.id().split('.')[-1] # Create a database, drop an existing one first dbname = 'cmscouch_unittest_%s' % self.testname.lower() if dbname in self.server.listDatabases(): self.server.deleteDatabase(dbname) self.server.createDatabase(dbname) self.db = self.server.connectDatabase(dbname) def tearDown(self): if sys.exc_info()[0] == None: # This test has passed, clean up after it dbname = 'cmscouch_unittest_%s' % self.testname.lower() self.server.deleteDatabase(dbname) def testCommitOne(self): # Can I commit one dict doc = {'foo':123, 'bar':456} id = self.db.commitOne(doc, returndocs=True)[0]['id'] # What about a Document doc = Document(inputDict = doc) id = self.db.commitOne(doc, returndocs=True)[0]['id'] def testCommitOneWithQueue(self): """ CommitOne bypasses the queue, but it should maintain the queue if present for a future call to commit. """ # Queue up five docs doc = {'foo':123, 'bar':456} for i in range(1,6): self.db.queue(doc) # Commit one Document doc = Document(inputDict = doc) id = self.db.commitOne(doc, returndocs=True)[0]['id'] self.assertEqual(1, len(self.db.allDocs()['rows'])) self.db.commit() self.assertEqual(6, len(self.db.allDocs()['rows'])) def testTimeStamping(self): doc = {'foo':123, 'bar':456} id = self.db.commitOne(doc, timestamp=True, returndocs=True)[0]['id'] doc = self.db.document(id) self.assertTrue('timestamp' in doc.keys()) def testDeleteDoc(self): doc = {'foo':123, 'bar':456} self.db.commitOne(doc) all_docs = self.db.allDocs() self.assertEqual(1, len(all_docs['rows'])) # The db.delete_doc is immediate id = all_docs['rows'][0]['id'] self.db.delete_doc(id) all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testDeleteQueuedDocs(self): doc1 = {'foo':123, 'bar':456} doc2 = {'foo':789, 'bar':101112} self.db.queue(doc1) self.db.queue(doc2) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) for res in all_docs['rows']: id = res['id'] doc = self.db.document(id) self.db.queueDelete(doc) all_docs = self.db.allDocs() self.assertEqual(2, len(all_docs['rows'])) self.db.commit() all_docs = self.db.allDocs() self.assertEqual(0, len(all_docs['rows'])) def testWriteReadDocNoID(self): doc = {} def testReplicate(self): repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo':123}, timestamp=True, returndocs=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) self.assertEqual(self.db.document(doc_id), repl_db.document(doc_id)) self.server.deleteDatabase(repl_db.name) def testSlashInDBName(self): """ Slashes are a valid character in a database name, and are useful as it creates a directory strucutre for the couch data files. """ db_name = 'wmcore/unittests' try: self.server.deleteDatabase(db_name) except: # Ignore this - the database shouldn't already exist pass db = self.server.createDatabase(db_name) info = db.info() assert info['db_name'] == db_name db_name = 'wmcore/unittests' db = self.server.connectDatabase(db_name) info = db.info() assert info['db_name'] == db_name db = Database(db_name) info = db.info() assert info['db_name'] == db_name self.server.deleteDatabase(db_name) def testInvalidName(self): """ Capitol letters are not allowed in database names. """ db_name = 'Not A Valid Name' self.assertRaises(ValueError, self.server.createDatabase, db_name) self.assertRaises(ValueError, self.server.deleteDatabase, db_name) self.assertRaises(ValueError, self.server.connectDatabase, db_name) self.assertRaises(ValueError, Database, db_name) def testDocumentSerialisation(self): """ A document should be writable into the couchdb with a timestamp. """ d = Document() d['foo'] = 'bar' doc_info = self.db.commit(doc=d, timestamp=True)[0] d_from_db = self.db.document(doc_info['id']) self.assertEquals(d['foo'], d_from_db['foo']) self.assertEquals(d['timestamp'], d_from_db['timestamp']) def testAttachments(self): """ Test uploading attachments with and without checksumming """ doc = self.db.commitOne({'foo':'bar'}, timestamp=True, returndocs=True)[0] attachment1 = "Hello" attachment2 = "How are you today?" attachment3 = "I'm very well, thanks for asking" attachment4 = "Lovely weather we're having" attachment5 = "Goodbye" keyhash = hashlib.md5() keyhash.update(attachment5) attachment5_md5 = keyhash.digest() attachment5_md5 = base64.b64encode(attachment5_md5) attachment6 = "Good day to you, sir!" #TODO: add a binary attachment - e.g. tar.gz doc = self.db.addAttachment(doc['id'], doc['rev'], attachment1) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment2, contentType="foo/bar") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment3, name="my_greeting") doc = self.db.addAttachment(doc['id'], doc['rev'], attachment4, add_checksum=True) doc = self.db.addAttachment(doc['id'], doc['rev'], attachment5, checksum=attachment5_md5) self.assertRaises(CouchInternalServerError, self.db.addAttachment, doc['id'], doc['rev'], attachment6, checksum='123') def testRevisionHandling(self): # This test won't work from an existing database, conflicts will be preserved, so # ruthlessly remove the databases to get a clean slate. try: self.server.deleteDatabase(self.db.name) except CouchNotFoundError: pass # Must have been deleted already try: self.server.deleteDatabase(self.db.name + 'repl') except CouchNotFoundError: pass # Must have been deleted already # I'm going to create a conflict, so need a replica db self.db = self.server.connectDatabase(self.db.name) repl_db = self.server.connectDatabase(self.db.name + 'repl') doc_id = self.db.commitOne({'foo':123}, timestamp=True, returndocs=True)[0]['id'] doc_v1 = self.db.document(doc_id) #replicate self.server.replicate(self.db.name, repl_db.name) doc_v2 = self.db.document(doc_id) doc_v2['bar'] = 456 doc_id_rev2 = self.db.commitOne(doc_v2, returndocs=True)[0] doc_v2 = self.db.document(doc_id) #now update the replica conflict_doc = repl_db.document(doc_id) conflict_doc['bar'] = 101112 repl_db.commitOne(conflict_doc) #replicate, creating the conflict self.server.replicate(self.db.name, repl_db.name) conflict_view = {'map':"function(doc) {if(doc._conflicts) {emit(doc._conflicts, null);}}"} data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) # Should have one conflict in the repl database self.assertEquals(data['total_rows'], 1) # Should have no conflicts in the source database self.assertEquals(self.db.post('/%s/_temp_view' % self.db.name, conflict_view)['total_rows'], 0) self.assertTrue(repl_db.documentExists(data['rows'][0]['id'], rev=data['rows'][0]['key'][0])) repl_db.delete_doc(data['rows'][0]['id'], rev=data['rows'][0]['key'][0]) data = repl_db.post('/%s/_temp_view' % repl_db.name, conflict_view) self.assertEquals(data['total_rows'], 0) self.server.deleteDatabase(repl_db.name) #update it again doc_v3 = self.db.document(doc_id) doc_v3['baz'] = 789 doc_id_rev3 = self.db.commitOne(doc_v3, returndocs=True)[0] doc_v3 = self.db.document(doc_id) #test that I can pull out an old revision doc_v1_test = self.db.document(doc_id, rev=doc_v1['_rev']) self.assertEquals(doc_v1, doc_v1_test) #test that I can check a revision exists self.assertTrue(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertFalse(self.db.documentExists(doc_id, rev='1'+doc_v2['_rev'])) #why you shouldn't rely on rev self.db.compact(blocking=True) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v1['_rev'])) self.assertFalse(self.db.documentExists(doc_id, rev=doc_v2['_rev'])) self.assertTrue(self.db.documentExists(doc_id, rev=doc_v3['_rev'])) def testCommit(self): """ Test queue and commit modes """ # try to commit 2 random docs doc = {'foo':123, 'bar':456} self.db.queue(doc) self.db.queue(doc) self.assertEqual(2, len(self.db.commit())) # committing 2 docs with the same id will fail self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "1", inputDict = {'foo':1234, 'bar':456})) answer = self.db.commit() self.assertEqual(2, len(answer)) self.assertEqual(answer[0]['error'], 'conflict') self.assertEqual(answer[1]['error'], 'conflict') # all_or_nothing mode ignores conflicts self.db.queue(Document(id = "2", inputDict = doc)) self.db.queue(Document(id = "2", inputDict = {'foo':1234, 'bar':456})) answer = self.db.commit(all_or_nothing = True) self.assertEqual(2, len(answer)) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0]['id'], '2') self.assertEqual(answer[1]['id'], '2') def testUpdateHandler(self): """ Test that update function support works """ update_ddoc = { '_id':'_design/foo', 'language': 'javascript', 'updates':{ "bump-counter" : 'function(doc, req) {if (!doc.counter) {doc.counter = 0};doc.counter += 1;return [doc,"bumped it!"];}', } } self.db.commit(update_ddoc) doc = {'foo': 123, 'counter': 0} doc_id = self.db.commit(doc)[0]['id'] self.assertEquals("bumped it!", self.db.updateDocument(doc_id, 'foo', 'bump-counter')) self.assertEquals(1, self.db.document(doc_id)['counter'])
class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name='workqueue', inbox_name=None, parentQueue=None, queueUrl=None, logger=None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name is None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create=False, size=10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement' def forceQueueSync(self): """Force a blocking replication - used only in tests""" self.pullFromParent(continuous=False) self.sendToParent(continuous=False) def pullFromParent(self, continuous=True, cancel=False): """Replicate from parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.logger.info( "Forcing pullFromParent from parentCouch: %s to queueUrl %s/%s", self.parentCouchUrl, self.queueUrl, self.inbox.name) self.server.replicate(source=self.parentCouchUrl, destination="%s/%s" % (self.hostWithAuth, self.inbox.name), filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex))) def sendToParent(self, continuous=True, cancel=False): """Replicate to parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.logger.info( "Forcing sendToParent from queueUrl %s/%s to parentCouch: %s", self.queueUrl, self.inbox.name, self.parentCouchUrl) self.server.replicate(source="%s" % self.inbox.name, destination=self.parentCouchUrlWithAuth, filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication to %s failed: %s' % (self.parentCouchUrl, str(ex))) def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status='Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def insertWMSpec(self, wmspec): """ Insert WMSpec to backend """ # Can't save spec to inbox, it needs to be visible to child queues # Can't save empty dict so add dummy variable dummyValues = {'name': wmspec.name()} # change specUrl in spec before saving (otherwise it points to previous url) wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" % (self.db.name, wmspec.name())) return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummyValues) def getWMSpec(self, name): """Get the spec""" wmspec = WMWorkloadHelper() wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name)) return wmspec def insertElements(self, units, parent=None): """ Insert element to database @param parent is the parent WorkQueueObject these element's belong to. i.e. a workflow which has been split """ if not units: return [] # store spec file separately - assume all elements share same spec self.insertWMSpec(units[0]['WMSpec']) newUnitsInserted = [] for unit in units: # cast to couch if not isinstance(unit, CouchWorkQueueElement): unit = CouchWorkQueueElement(self.db, elementParams=dict(unit)) if parent: unit['ParentQueueId'] = parent.id unit['TeamName'] = parent['TeamName'] unit['WMBSUrl'] = parent['WMBSUrl'] if unit._couch.documentExists(unit.id): self.logger.info( 'Element "%s" already exists, skip insertion.' % unit.id) continue else: newUnitsInserted.append(unit) unit.save() unit._couch.commit(all_or_nothing=True) return newUnitsInserted def createWork(self, spec, **kwargs): """Return the Inbox element for this spec. This does not persist it to the database. """ kwargs.update({ 'WMSpec': spec, 'RequestName': spec.name(), 'StartPolicy': spec.startPolicyParameters(), 'EndPolicy': spec.endPolicyParameters(), 'OpenForNewData': False }) unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs) unit.id = spec.name() return unit def getElements(self, status=None, elementIDs=None, returnIdOnly=False, db=None, loadSpec=False, WorkflowName=None, **elementFilters): """Return elements that match requirements status, elementIDs & filters are 'AND'ed together to filter elements. returnIdOnly causes the element not to be loaded and only the id returned db is used to specify which database to return from loadSpec causes the workflow for each spec to be loaded. WorkflowName may be used in the place of RequestName """ key = [] if not db: db = self.db if elementFilters.get('RequestName') and not WorkflowName: WorkflowName = elementFilters.pop('RequestName') if elementIDs: if elementFilters or status or returnIdOnly: msg = "Can't specify extra filters (or return id's) when using element id's with getElements()" raise ValueError(msg) elements = [ CouchWorkQueueElement(db, i).load() for i in elementIDs ] else: options = { 'include_docs': True, 'filter': elementFilters, 'idOnly': returnIdOnly, 'reduce': False } # filter on workflow or status if possible filterName = 'elementsByWorkflow' if WorkflowName: key.append(WorkflowName) elif status: filterName = 'elementsByStatus' key.append(status) elif elementFilters.get('SubscriptionId'): key.append(elementFilters['SubscriptionId']) filterName = 'elementsBySubscription' # add given params to filters if status: options['filter']['Status'] = status if WorkflowName: options['filter']['RequestName'] = WorkflowName view = db.loadList('WorkQueue', 'filter', filterName, options, key) view = json.loads(view) if returnIdOnly: return view elements = [ CouchWorkQueueElement.fromDocument(db, row) for row in view ] if loadSpec: specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = self.getWMSpec(ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def getInboxElements(self, *args, **kwargs): """ Return elements from Inbox, supports same semantics as getElements() """ return self.getElements(*args, db=self.inbox, **kwargs) def getElementsForWorkflow(self, workflow): """Get elements for a workflow""" elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': workflow, 'include_docs': True, 'reduce': False }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParent(self, parent): """Get elements with the given parent""" elements = self.db.loadView('WorkQueue', 'elementsByParent', { 'key': parent.id, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def saveElements(self, *elements): """Persist elements Returns elements successfully saved, user must verify to catch errors """ result = [] if not elements: return result for element in elements: element.save() answer = elements[0]._couch.commit() result, failures = formatReply(answer, *elements) msg = 'Couch error saving element: "%s", error "%s", reason "%s"' for failed in failures: self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) return result def _raiseConflictErrorAndLog(self, conflictIDs, updatedParams, dbName="workqueue"): errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % ( dbName, conflictIDs, updatedParams) self.logger.error(errorMsg) raise WorkQueueError(errorMsg) def updateElements(self, *elementIds, **updatedParams): """Update given element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.db.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams) return def updateInboxElements(self, *elementIds, **updatedParams): """Update given inbox element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams, "workqueue_inbox") return def deleteElements(self, *elements): """Delete elements""" if not elements: return specs = {} for i in elements: i.delete() specs[i['RequestName']] = None answer = elements[0]._couch.commit() _, failures = formatReply(answer, *elements) msg = 'Couch error deleting element: "%s", error "%s", reason "%s"' for failed in failures: # only count delete as failed if document still exists if elements[0]._couch.documentExists(failed['id']): self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) # delete specs if no longer used for wf in specs: try: if not self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': wf, 'limit': 1, 'reduce': False })['rows']: self.db.delete_doc(wf) except CouchNotFoundError: pass def calculateAvailableWork(self, thresholds, siteJobCounts): """ A short version of the `availableWork` method, which is used only to calculate the amount of work already available at the local workqueue. :param thresholds: a dictionary key'ed by the site name, values representing the maximum number of jobs allowed at that site. :param siteJobCounts: a dictionary-of-dictionaries key'ed by the site name; value is a dictionary with the number of jobs running at a given priority. :return: a tuple with the elements accepted and an overview of job counts per site """ # NOTE: this method can be less verbose as well elements = [] # If there are no sites, punt early. if not thresholds: self.logger.error("No thresholds is set: Please check") return elements, siteJobCounts self.logger.info("Calculating available work from queue %s", self.queueUrl) options = {} options['include_docs'] = True options['descending'] = True options['resources'] = thresholds options['num_elem'] = 9999999 # magic number! result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result = json.loads(result) self.logger.info( "Retrieved %d elements from workRestrictions list for: %s", len(result), self.queueUrl) # Convert python dictionary into Couch WQE objects # And sort them by creation time and priority, such that highest priority and # oldest elements come first in the list sortedElements = [] for item in result: element = CouchWorkQueueElement.fromDocument(self.db, item) sortedElements.append(element) sortAvailableElements(sortedElements) for element in sortedElements: commonSites = possibleSites(element) prio = element['Priority'] # shuffle list of common sites all the time to give everyone the same chance random.shuffle(commonSites) possibleSite = None for site in commonSites: if site in thresholds: # Count the number of jobs currently running of greater priority, if they # are less than the site thresholds, then accept this element curJobCount = sum([ x[1] if x[0] >= prio else 0 for x in viewitems(siteJobCounts.get(site, {})) ]) self.logger.debug("Job Count: %s, site: %s thresholds: %s", curJobCount, site, thresholds[site]) if curJobCount < thresholds[site]: possibleSite = site break if possibleSite: self.logger.debug( "Meant to accept workflow: %s, with prio: %s, element id: %s, for site: %s", element['RequestName'], prio, element.id, possibleSite) elements.append(element) siteJobCounts.setdefault(possibleSite, {}) siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \ element['Jobs'] * element.get('blowupFactor', 1.0) else: self.logger.debug( "No available resources for %s with localdoc id %s", element['RequestName'], element.id) self.logger.info( "And %d elements passed location and siteJobCounts restrictions for: %s", len(elements), self.queueUrl) return elements, siteJobCounts def availableWork(self, thresholds, siteJobCounts, team=None, excludeWorkflows=None, numElems=9999999): """ Get work - either from local or global queue - which is available to be run. :param thresholds: a dictionary key'ed by the site name, values representing the maximum number of jobs allowed at that site. :param siteJobCounts: a dictionary-of-dictionaries key'ed by the site name; value is a dictionary with the number of jobs running at a given priority. :param team: a string with the team name we want to pull work for :param excludeWorkflows: list of (aborted) workflows that should not be accepted :param numElems: integer with the maximum number of elements to be accepted (default to a very large number when pulling work from local queue, read unlimited) :return: a tuple with the elements accepted and an overview of job counts per site """ excludeWorkflows = excludeWorkflows or [] elements = [] # If there are no sites, punt early. if not thresholds: self.logger.error("No thresholds is set: Please check") return elements, siteJobCounts self.logger.info("Current siteJobCounts:") for site, jobsByPrio in viewitems(siteJobCounts): self.logger.info(" %s : %s", site, jobsByPrio) self.logger.info("Getting up to %d available work from %s", numElems, self.queueUrl) self.logger.info(" for team name: %s", team) self.logger.info(" with excludeWorkflows: %s", excludeWorkflows) self.logger.info(" for thresholds: %s", thresholds) # FIXME: magic numbers docsSliceSize = 1000 options = {} options['include_docs'] = True options['descending'] = True options['resources'] = thresholds options['limit'] = docsSliceSize # FIXME: num_elem option can likely be deprecated, but it needs synchronization # between agents and global workqueue... for now, make sure it can return the slice size options['num_elem'] = docsSliceSize if team: options['team'] = team # Fetch workqueue elements in slices, using the CouchDB "limit" and "skip" # options for couch views. Conditions to stop this loop are: # a) have a hard stop at 50k+1 (we might have to make this configurable) # b) stop as soon as an empty slice is returned by Couch (thus all docs have # already been retrieve) # c) or, once "numElems" elements have been accepted numSkip = 0 breakOut = False while True: if breakOut: # then we have reached the maximum number of elements to be accepted break self.logger.info(" with limit docs: %s, and skip first %s docs", docsSliceSize, numSkip) options['skip'] = numSkip result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result = json.loads(result) if result: self.logger.info( "Retrieved %d elements from workRestrictions list for: %s", len(result), self.queueUrl) else: self.logger.info( "All the workqueue elements have been exhausted for: %s ", self.queueUrl) break # update number of documents to skip in the next cycle numSkip += docsSliceSize # Convert python dictionary into Couch WQE objects, skipping aborted workflows # And sort them by creation time and priority, such that highest priority and # oldest elements come first in the list sortedElements = [] for i in result: element = CouchWorkQueueElement.fromDocument(self.db, i) # make sure not to acquire work for aborted or force-completed workflows if element['RequestName'] in excludeWorkflows: msg = "Skipping aborted/force-completed workflow: %s, work id: %s" self.logger.info(msg, element['RequestName'], element._id) else: sortedElements.append(element) sortAvailableElements(sortedElements) for element in sortedElements: if numElems <= 0: msg = "Reached maximum number of elements to be accepted, " msg += "configured to: {}, from queue: {}".format( len(elements), self.queueUrl) self.logger.info(msg) breakOut = True # get out of the outer loop as well break commonSites = possibleSites(element) prio = element['Priority'] # shuffle list of common sites all the time to give everyone the same chance random.shuffle(commonSites) possibleSite = None for site in commonSites: if site in thresholds: # Count the number of jobs currently running of greater priority, if they # are less than the site thresholds, then accept this element curJobCount = sum([ x[1] if x[0] >= prio else 0 for x in viewitems(siteJobCounts.get(site, {})) ]) self.logger.debug( "Job Count: %s, site: %s thresholds: %s" % (curJobCount, site, thresholds[site])) if curJobCount < thresholds[site]: possibleSite = site break if possibleSite: self.logger.info( "Accepting workflow: %s, with prio: %s, element id: %s, for site: %s", element['RequestName'], prio, element.id, possibleSite) numElems -= 1 elements.append(element) siteJobCounts.setdefault(possibleSite, {}) siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \ element['Jobs'] * element.get('blowupFactor', 1.0) else: self.logger.debug( "No available resources for %s with doc id %s", element['RequestName'], element.id) self.logger.info( "And %d elements passed location and siteJobCounts restrictions for: %s", len(elements), self.queueUrl) return elements, siteJobCounts def getActiveData(self): """Get data items we have work in the queue for""" data = self.db.loadView('WorkQueue', 'activeData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActiveParentData(self): """Get data items we have work in the queue for with parent""" data = self.db.loadView('WorkQueue', 'activeParentData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActivePileupData(self): """Get data items we have work in the queue for with pileup""" data = self.db.loadView('WorkQueue', 'activePileupData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getElementsForData(self, data): """Get active elements for this dbs & data combo""" elements = self.db.loadView('WorkQueue', 'elementsByData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParentData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByParentData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForPileupData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByPileupData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def isAvailable(self): """Is the server available, i.e. up and not compacting""" try: compacting = self.db.info()['compact_running'] if compacting: self.logger.info("CouchDB compacting - try again later.") return False except Exception as ex: self.logger.error("CouchDB unavailable: %s" % str(ex)) return False return True def getWorkflows(self, includeInbox=False, includeSpecs=False): """Returns workflows known to workqueue""" result = set([ x['key'] for x in self.db.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeInbox: result = result | set([ x['key'] for x in self.inbox.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeSpecs: result = result | set([ x['key'] for x in self.db.loadView('WorkQueue', 'specsByWorkflow')['rows'] ]) return list(result) def queueLength(self): """Return number of available elements""" return self.db.loadView('WorkQueue', 'availableByPriority', {'limit': 0})['total_rows'] def fixConflicts(self): """Fix elements in conflict Each local queue runs this to resolve its conflicts with global, resolution propagates up to global. Conflicting elements are merged into one element with others deleted. This will fail if elements are modified during the resolution - if this happens rerun. """ for db in [self.inbox, self.db]: for row in db.loadView('WorkQueue', 'conflicts')['rows']: elementId = row['id'] try: conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(elementId, rev)) \ for rev in row['value']] fixed_elements = fixElementConflicts(*conflicting_elements) if self.saveElements(fixed_elements[0]): self.saveElements( *fixed_elements[1:] ) # delete others (if merged value update accepted) except Exception as ex: self.logger.error("Error resolving conflict for %s: %s" % (elementId, str(ex))) def recordTaskActivity(self, taskname, comment=''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex))) def getWMBSInjectStatus(self, request=None): """ This service only provided by global queue except on draining agent """ options = {'group': True, 'reduce': True} if request: options.update(key=request) data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest', options) if request: if data['rows']: injectionStatus = data['rows'][0]['value'] inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False return injectionStatus and not requestOpen else: raise WorkQueueNoMatchingElements("%s not found" % request) else: injectionStatus = dict( (x['key'], x['value']) for x in data.get('rows', [])) finalInjectionStatus = [] for request in injectionStatus: inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False finalInjectionStatus.append( {request: injectionStatus[request] and not requestOpen}) return finalInjectionStatus def getWorkflowNames(self, inboxFlag=False): """Get workflow names from workqueue db""" if inboxFlag: db = self.inbox else: db = self.db data = db.loadView('WorkQueue', 'elementsByWorkflow', { 'stale': "update_after", 'reduce': True, 'group': True }) return [x['key'] for x in data.get('rows', [])] def deleteWQElementsByWorkflow(self, workflowNames): """ delete workqueue elements belongs to given workflow names it doen't check the status of workflow so need to be careful to use this. Pass only workflows which has the end status """ deleted = 0 dbs = [self.db, self.inbox] if not isinstance(workflowNames, list): workflowNames = [workflowNames] if len(workflowNames) == 0: return deleted options = {} options["stale"] = "update_after" options["reduce"] = False idsByWflow = {} for couchdb in dbs: result = couchdb.loadView("WorkQueue", "elementsByWorkflow", options, workflowNames) for entry in result["rows"]: idsByWflow.setdefault(entry['key'], []) idsByWflow[entry['key']].append(entry['id']) for wflow, docIds in viewitems(idsByWflow): self.logger.info( "Going to delete %d documents in *%s* db for workflow: %s. Doc IDs: %s", len(docIds), couchdb.name, wflow, docIds) try: couchdb.bulkDeleteByIDs(docIds) except CouchNotFoundError as exc: self.logger.error( "Failed to find one of the documents. Error: %s", str(exc)) deleted += len(docIds) # delete the workflow with spec from workqueue db for wf in workflowNames: self.db.delete_doc(wf) return deleted
destDbBase = sys.argv[2] destCouchServer = CouchServer(dburl=destCouchHost) srcCouchServer = CouchServer(dburl=srcCouchHost) srcJobsDb = srcCouchHost + "/" + srcDbBase + "%2Fjobs" destJobsDb = destCouchHost + "/" + destDbBase + "%2Fjobs" srcFwjrsDb = srcCouchHost + "/" + srcDbBase + "%2Ffwjrs" destFwjrsDb = destCouchHost + "/" + destDbBase + "%2Ffwjrs" print("Archiving %s/%s to %s/%s..." % (srcCouchHost, srcDbBase, destCouchHost, destDbBase)) # Replicate the FWJR and Jobs databases... print(" Replicating jobs database...") destCouchServer.replicate(srcJobsDb, destJobsDb, create_target=True) print(" Replication fwjrs database...") destCouchServer.replicate(srcFwjrsDb, destFwjrsDb, create_target=True) # Generate views for the various databases destJobsDb = destCouchServer.connectDatabase(destDbBase + "/jobs") destFwjrsDb = destCouchServer.connectDatabase(destDbBase + "/fwjrs") print(" Triggering view generation for jobs database...") destJobsDb.loadView("JobDump", "statusByWorkflowName", options={"limit": 1}) print(" Triggering view generation for fwjrs database...") destFwjrsDb.loadView("FWJRDump", "outputByWorkflowName", options={"limit": 1}) print("") # Query destination DB for list of workflows summaryBase = "%s/%s%%2Ffwjrs/_design/FWJRDump/_show/workflowSummary/%s" # dest host, dest db base, workflow name successBase = "%s/%s%%2Fjobs/_design/JobDump/_list/successJobs/statusByWorkflowName?startkey=%%5B%%22%s%%22%%5D&endkey=%%5B%%22%s%%22%%2C%%7B%%7D%%5D&reduce=false" # dest host, dest db base, workflow, workflow
class WMStatsWriter(WMStatsReader): def __init__(self, couchURL, dbName=None): # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.replicatorDB = self.couchServer.connectDatabase( '_replicator', False) def uploadData(self, docs): """ upload to given couchURL using cert and key authentication and authorization """ # add delete docs as well for the compaction # need to check whether delete and update is successful if type(docs) == dict: docs = [docs] for doc in docs: self.couchDB.queue(doc) return self.couchDB.commit(returndocs=True) def insertRequest(self, schema): doc = monitorDocFromRequestSchema(schema) return self.insertGenericRequest(doc) def insertGenericRequest(self, doc): result = self.couchDB.updateDocument( doc['_id'], 'WMStats', 'insertRequest', fields={'doc': JSONEncoder().encode(doc)}) self.updateRequestStatus(doc['_id'], "new") return result def updateRequestStatus(self, request, status): statusTime = {'status': status, 'update_time': int(time.time())} return self.couchDB.updateDocument( request, 'WMStats', 'requestStatus', fields={'request_status': JSONEncoder().encode(statusTime)}) def updateTeam(self, request, team): return self.couchDB.updateDocument(request, 'WMStats', 'team', fields={'team': team}) def insertTotalStats(self, request, totalStats): """ update the total stats of given workflow (total_jobs, input_events, input_lumis, input_num_files) """ return self.couchDB.updateDocument(request, 'WMStats', 'totalStats', fields=totalStats) def updateFromWMSpec(self, spec): # currently only update priority and siteWhitelist and output dataset # complex field needs to be JSON encoded # assuming all the toplevel tasks has the same site white lists #priority is priority + user priority + group priority fields = { 'priority': spec.priority(), 'site_white_list': spec.getTopLevelTask()[0].siteWhitelist(), 'outputdatasets': spec.listOutputDatasets() } return self.couchDB.updateDocument( spec.name(), 'WMStats', 'generalFields', fields={'general_fields': JSONEncoder().encode(fields)}) def updateRequestsInfo(self, docs): """ bulk update for request documents. TODO: change to bulk update handler when it gets supported """ for doc in docs: del doc['type'] self.couchDB.updateDocument( doc['workflow'], 'WMStats', 'generalFields', fields={'general_fields': JSONEncoder().encode(doc)}) def updateAgentInfo(self, agentInfo): return self.couchDB.updateDocument( agentInfo['_id'], 'WMStats', 'agentInfo', fields={'agent_info': JSONEncoder().encode(agentInfo)}) def deleteOldDocs(self, days): """ delete the documents from wmstats db older than param 'days' """ sec = int(days * 24 * 60 * 60) threshold = int(time.time()) - sec options = { "startkey": threshold, "descending": True, "stale": "update_after" } result = self.couchDB.loadView("WMStats", "time", options) for row in result['rows']: doc = {} doc['_id'] = row['value']['id'] doc['_rev'] = row['value']['rev'] self.couchDB.queueDelete(doc) committed = self.couchDB.commit() if committed: errorReport = {} deleted = 0 for data in committed: if data.has_key('error'): errorReport.setdefault(data['error'], 0) errorReport[data['error']] += 1 else: deleted += 1 return {'delete': deleted, 'error': errorReport} else: return "nothing" def replicate(self, target): return self.couchServer.replicate(self.dbName, target, continuous=True, filter='WMStats/repfilter', useReplicator=True) def getDBInstance(self): return self.couchDB def getServerInstance(self): return self.couchServer def getActiveTasks(self): couchStatus = self.couchServer.status() return couchStatus['active_tasks'] def deleteReplicatorDocs(self): repDocs = self.replicatorDB.allDocs()['rows'] for j in repDocs: if not j['id'].startswith('_'): doc = {} doc["_id"] = j['id'] doc["_rev"] = j['value']['rev'] self.replicatorDB.queueDelete(doc) committed = self.replicatorDB.commit()