def acdcCleanup(self, config): """ gather active data statistics """ reqDB = RequestDBReader(config.reqmgrdb_url) from WMCore.ACDC.CouchService import CouchService baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) acdcService = CouchService(url=baseURL, database=acdcDB) originalRequests = acdcService.listCollectionNames() if len(originalRequests) == 0: return # filter requests results = reqDB._getCouchView("byrequest", {}, originalRequests) # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived] deleteStates = [ "announced", "rejected-archived", "aborted-archived", "normal-archived" ] filteredRequests = [] for row in results["rows"]: if row["value"][0] in deleteStates: filteredRequests.append(row["key"]) total = 0 for req in filteredRequests: try: deleted = acdcService.removeFilesetsByCollectionName(req) if deleted == None: self.logger.warning("request alread deleted %s", req) else: total += len(deleted) self.logger.info("request %s deleted", req) except Exception as ex: self.logger.error( "request deleted failed: will try again %s: %s", req, str(ex)) self.logger.info("total %s requests deleted", total) return
def acdcCleanup(self, config): """ gather active data statistics """ reqDB = RequestDBReader(config.reqmgrdb_url) from WMCore.ACDC.CouchService import CouchService baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) acdcService = CouchService(url = baseURL, database = acdcDB) originalRequests = acdcService.listCollectionNames() if len(originalRequests) == 0: return # filter requests results = reqDB._getCouchView("byrequest", {}, originalRequests) # checkt he status of the requests [announced, rejected-archived, aborted-archived, normal-archived] deleteStates = ["announced", "rejected-archived", "aborted-archived", "normal-archived"] filteredRequests = [] for row in results["rows"]: if row["value"][0] in deleteStates: filteredRequests.append(row["key"]) total = 0 for req in filteredRequests: try: deleted = acdcService.removeFilesetsByCollectionName(req) if deleted == None: self.logger.warning("request alread deleted %s" % req) else: total += len(deleted) self.logger.info("request %s deleted" % req) except: self.logger.error("request deleted failed: will try again %s" % req) self.logger.info("total %s requests deleted" % total) return
class CouchDBCleanup(CherryPyPeriodicTask): def __init__(self, rest, config): super(CouchDBCleanup, self).__init__(config) self.reqDB = RequestDBReader(config.reqmgrdb_url) self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger) # statuses that we want to keep the transfer documents self.transferStatuses = [ "assigned", "staging", "staged", "acquired", "failed", "running-open", "running-closed" ] baseURL, acdcDB = splitCouchServiceURL(config.acdc_url) self.acdcService = CouchService(url=baseURL, database=acdcDB) def setConcurrentTasks(self, config): """ sets the list of functions which """ self.concurrentTasks = [{ 'func': self.acdcCleanup, 'duration': config.acdcCleanDuration }, { 'func': self.auxCouchCleanup, 'duration': config.auxCleanDuration }] def auxCouchCleanup(self, config): """ Cleanup TRANSFER documents from the reqmgr_auxiliary CouchDB. The list of status can be expanded in the future """ self.logger.info("Fetching TRANSFER documents from CouchDB...") transferDocs = self.reqmgrAux.getTransferInfo("ALL_DOCS") if not transferDocs: self.logger.info( " there are no transfer documents in the database.") return auxDocs = [] for row in transferDocs: auxDocs.append(row['workflowName']) results = self.reqDB._getCouchView("bystatus", {}, self.transferStatuses) activeRequests = [] for row in results["rows"]: activeRequests.append(row["id"]) # now find transfer docs that are not active in the system transferDocs = [] for transferDoc in auxDocs: if transferDoc not in activeRequests: transferDocs.append(transferDoc) self.logger.info("Found %d transfer documents to delete", len(transferDocs)) for wflowName in transferDocs: self.logger.info("Deleting transfer document: %s", wflowName) try: self.reqmgrAux.deleteConfigDoc("transferinfo", wflowName) except Exception as exc: self.logger.warning( "Failed to delete transfer doc: %s. Error: %s", wflowName, str(exc)) self.logger.info("Transfer documents cleanup completed.") def acdcCleanup(self, config): """ gather active data statistics """ self.logger.info("Fetching ACDC collection names...") originalRequests = self.acdcService.listCollectionNames() if not originalRequests: self.logger.info(" there are no collection documents to delete.") return # filter requests results = self.reqDB._getCouchView("byrequest", {}, originalRequests) # filter requests only in the following status deleteStates = [ "announced", "rejected-archived", "aborted-archived", "normal-archived" ] filteredRequests = [] for row in results["rows"]: if row["value"][0] in deleteStates: filteredRequests.append(row["key"]) total = 0 for req in filteredRequests: try: self.logger.info("Removing ACDC collection for: %s", req) deleted = self.acdcService.removeFilesetsByCollectionName(req) if deleted is None: self.logger.warning(" request '%s' already deleted", req) else: total += len(deleted) self.logger.info("request %s deleted", req) except Exception as ex: self.logger.error( "Failed to delete request: %s, will try again later. Error: %s", req, str(ex)) self.logger.info("total %s requests deleted", total) return
class BuildParentLock(CherryPyPeriodicTask): def __init__(self, rest, config): super(BuildParentLock, self).__init__(config) self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger) self.dbs = DBS3Reader(config.dbs_url) # cache of dbs lookups mapping input dataset to parent dataset self.dbsLookupCache = {} # set of of currently active datasets requiring parent dataset self.inputDatasetCache = set() self.reqDB = RequestDBReader(config.reqmgrdb_url) self.filterKeys = [ 'assignment-approved', 'assigned', 'staging', 'staged', 'failed', 'acquired', 'running-open', 'running-closed', 'force-complete', 'completed', 'closed-out' ] def setConcurrentTasks(self, config): """ sets the list of functions which """ self.concurrentTasks = [{ 'func': self.fetchIncludeParentsRequests, 'duration': config.updateParentsInterval }] def fetchIncludeParentsRequests(self, config): """ Fetch active requests from the "requestsincludeparents" couch view that have IncludeParents=True, find parents of each dataset and send to reqmgr2 auxiliary database. """ # use this boolean to signal whether there were datasets that failed # to get their parentage resolved incompleteParentage = False # use this boolean to signal if new parent datasets need to be locked auxDbUpdateRequired = False setDsets = set() setParents = set() dictParents = {} self.logger.info("Executing parent lock cherrypy thread") # query couch view to find datasets for workflows requiring parent datasets # only returning requests with the statuses in filterKeys try: results = self.reqDB._getCouchView("requestsincludeparents", {}, self.filterKeys) except Exception as ex: self.logger.error( "Error retrieving requests including parent datasets from couchdb." ) self.logger.error("Error: %s", str(ex)) return for row in results["rows"]: dataset = row["value"] setDsets.add(dataset) # check to see if any changes have been made if setDsets != self.inputDatasetCache: auxDbUpdateRequired = True self.inputDatasetCache = setDsets.copy() self.logger.info( "Found %d unique datasets requiring the parent dataset", len(setDsets)) if auxDbUpdateRequired: self.logger.info("Found new parent dataset locks to update.") # look up parent datasets first via the local DBS cache, if not found do lookup via DBS for dset in setDsets: if dset in self.dbsLookupCache: setParents.add(self.dbsLookupCache[dset]) self.logger.info( "Resolved parentage via lookup cache for: %s", dset) else: try: res = self.dbs.listDatasetParents(dset) except Exception as exc: self.logger.warning( "Failed to resolve parentage for: %s. Error: %s", dset, str(exc)) incompleteParentage = True continue self.logger.info("Resolved parentage via DBS for: %s", res) if res: setParents.add(res[0]['parent_dataset']) self.dbsLookupCache[dset] = res[0]['parent_dataset'] if not incompleteParentage: dictParents['parentlocks'] = list(setParents) if self.reqmgrAux.updateParentLocks(dictParents): self.logger.info( "Parentage lookup complete and auxiliary database updated." ) else: self.logger.info( "Error updating parentage document. Using stale data until next cycle." ) else: # then don't replace any data for the moment, simply add new parents previousData = self.reqmgrAux.getParentLocks() # check to see if response from aux db has been populated if previousData and 'parentlocks' in previousData[0]: setPreviousData = set(previousData[0]['parentlocks']) setParents = setParents | setPreviousData dictParents['parentlocks'] = list(setParents) self.reqmgrAux.updateParentLocks(dictParents) self.logger.info( "Parentage lookup complete (with errors) and auxiliary database updated." ) else: self.logger.info( "Parent locks not returned from auxiliary database. Skipping parentage update." ) else: self.logger.info( "No new parent datasets need locked. Skipping update of auxiliary database." ) return