def __init__(self, config): BaseWorkerThread.__init__(self) self.config = config.FilesCleaner self.logger.debug('Configuration loaded') try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug('Configuration loaded') config_server = CouchServer(dburl=self.config.config_couch_instance) self.config_db = config_server.connectDatabase(self.config.config_database) self.logger.debug('Connected to files DB') self.phedex = PhEDEx(responseType='xml') self.log_dir = '%s/logs/%s/%s/%s' % ( self.config.componentDir, \ str(datetime.datetime.now().month), str(datetime.datetime.now().year), "Ops") try: os.makedirs(self.log_dir) except OSError, e: if e.errno == errno.EEXIST: pass else: self.logger.error('Unknown error in mkdir' % e.errno) raise
def __init__(self, config): # configuration values: # 'uri' attribute (URL of the REST server and resource name) # in case of CouchDB, the resource name is the database name # http://servername:port/databaseName self.config = config # the class currently relies only on 1 REST server possibility - the # CouchDB server. as explained above, .database will be replaced by # .connection if both a generic REST server as well as CouchDB are to # be talked to split = self.config.uri.rfind('/') dbName = self.config.uri[split + 1:] # get last item of URI - database name url = self.config.uri[:split] # as opposed to CouchSink, here it's assumed the resource (the database name) # does exist, fail here otherwise # this check / rest of the constructed may be revised for # general REST server server = CouchServer(url) databases = server.listDatabases() if dbName not in databases: raise Exception("%s: REST URI: %s, %s does not exist." % (self.__class__.__name__, self.config.uri, dbName)) self._database = Database(dbName, url) logging.debug("%s initialized." % self.__class__.__name__)
class CouchAppTestHarness: """ Test Harness for installing a couch database instance with several couchapps in a unittest.setUp and wiping it out in a unittest.tearDown """ def __init__(self, dbName, couchUrl = None): self.couchUrl = os.environ.get("COUCHURL", couchUrl) self.dbName = dbName if self.couchUrl == None: msg = "COUCHRURL env var not set..." raise RuntimeError, msg self.couchServer = CouchServer(self.couchUrl) self.couchappConfig = Config() def create(self): """create couch db instance""" if self.dbName in self.couchServer.listDatabases(): self.drop() self.couchServer.createDatabase(self.dbName) def drop(self): """blow away the couch db instance""" self.couchServer.deleteDatabase(self.dbName) def pushCouchapps(self, *couchappdirs): """ push a list of couchapps to the database """ for couchappdir in couchappdirs: couchapppush(self.couchappConfig, couchappdir, "%s/%s" % (self.couchUrl, urllib.quote_plus(self.dbName)))
def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = True) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) couchdb = CouchServer(config.JobStateMachine.couchurl) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id = workload.name()) self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999')) failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'] for key, value in failedRunInfo.items(): failedRunInfo[key] = list(set(value)) self.assertEquals(failedRunInfo, {'10' : [12312]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return
def __init__(self, config): """ Initialise class members """ BaseDaemon.__init__(self, config, 'RetryManager') if self.config.isOracle: self.oracleDB = HTTPRequests(self.config.oracleDB, self.config.opsProxy, self.config.opsProxy) else: try: server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) except Exception as e: self.logger.exception('A problem occured when connecting to couchDB: %s' % e) raise self.logger.debug('Connected to files DB') # Set up a factory for loading plugins self.factory = WMFactory(self.config.retryAlgoDir, namespace=self.config.retryAlgoDir) try: self.plugin = self.factory.loadObject(self.config.algoName, self.config, getFromCache=False, listFlag=True) except Exception as ex: msg = "Error loading plugin %s on path %s\n" % (self.config.algoName, self.config.retryAlgoDir) msg += str(ex) self.logger.error(msg) raise RetryManagerException(msg) self.cooloffTime = self.config.cooloffTime
class ChangeState(WMObject, WMConnectionBase): """ Propagate the state of a job through the JSM. """ def __init__(self, config, couchDbName = None): WMObject.__init__(self, config) WMConnectionBase.__init__(self, "WMCore.WMBS") if couchDbName == None: self.dbname = getattr(self.config.JobStateMachine, "couchDBName") else: self.dbname = couchDbName try: self.couchdb = CouchServer(self.config.JobStateMachine.couchurl) self.jobsdatabase = self.couchdb.connectDatabase("%s/jobs" % self.dbname) self.fwjrdatabase = self.couchdb.connectDatabase("%s/fwjrs" % self.dbname) except Exception, ex: logging.error("Error connecting to couch: %s" % str(ex)) self.jobsdatabase = None self.fwjrdatabase = None self.getCouchDAO = self.daofactory("Jobs.GetCouchID") self.setCouchDAO = self.daofactory("Jobs.SetCouchID") self.incrementRetryDAO = self.daofactory("Jobs.IncrementRetry") self.workflowTaskDAO = self.daofactory("Jobs.GetWorkflowTask") self.maxUploadedInputFiles = getattr(self.config.JobStateMachine, 'maxFWJRInputFiles', 1000) return
def atestB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl") workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup( config=config, name=workload.name(), specLocation=workloadPath, error=True ) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) workloadSummary = workdatabase.document(id="TestWorkload") self.assertEqual(workloadSummary["/TestWorkload/ReReco"]["failureTime"], 500) self.assertTrue(workloadSummary["/TestWorkload/ReReco"]["cmsRun1"].has_key("99999")) return
class Algo: """ Plugins parent class. """ def __init__(self, config, logger, users, pool_size): """ Initialise class members """ self.config = config self.logger = logger self.asyncServer = CouchServer(self.config.couch_instance) self.db = self.asyncServer.connectDatabase(self.config.files_database) self.config_db = self.asyncServer.connectDatabase(self.config.config_database) self.users = users self.pool_size = pool_size def __call__(self): """ __call__ should be over written by subclasses such that useful results are returned """ return [] def updateSource(self, inputDict): """ UpdateSource should be over written by subclasses to make a specific update in the source """ return []
class ChangeState(WMObject, WMConnectionBase): """ Propagate the state of a job through the JSM. """ def __init__(self, config, couchDbName=None): WMObject.__init__(self, config) WMConnectionBase.__init__(self, "WMCore.WMBS") if couchDbName == None: self.dbname = getattr(self.config.JobStateMachine, "couchDBName") else: self.dbname = couchDbName try: self.couchdb = CouchServer(self.config.JobStateMachine.couchurl) self.jobsdatabase = self.couchdb.connectDatabase("%s/jobs" % self.dbname) self.fwjrdatabase = self.couchdb.connectDatabase("%s/fwjrs" % self.dbname) except Exception, ex: logging.error("Error connecting to couch: %s" % str(ex)) self.jobsdatabase = None self.fwjrdatabase = None try: self.dashboardReporter = DashboardReporter(config) except Exception, ex: logging.error( "Error setting up the \ dashboard reporter: %s" % str(ex) )
def setUp(self): """ _setUp_ Setup couchdb and the test environment """ # Set external test helpers self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser") EmulatorHelper.setEmulators(siteDB = True) # Define test environment self.couchUrl = os.environ["COUCHURL"] self.acdcDBName = 'resubmitblock_t' self.validLocations = ['srm-cms.gridpp.rl.ac.uk', 'cmssrm.fnal.gov', 'srm.unl.edu'] self.validLocationsCMSNames = ['T2_US_Nebraska', 'T1_US_FNAL', 'T1_UK_RAL'] self.siteWhitelist = ['T2_XX_SiteA'] self.workflowName = 'dballest_ReReco_workflow' couchServer = CouchServer(dburl = self.couchUrl) self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create = False) user = makeUser('unknown', '*****@*****.**', self.couchUrl, self.acdcDBName) user.create() return
def setUp(self): """ _setUp_ Setup couchdb and the test environment """ super(ResubmitBlockTest, self).setUp() self.group = 'unknown' self.user = '******' # Set external test helpers self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser") EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=True, requestMgr=False) # Define test environment self.couchUrl = os.environ["COUCHURL"] self.acdcDBName = 'resubmitblock_t' self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL', 'T1_UK_RAL'] self.siteWhitelist = ['T2_XX_SiteA'] self.workflowName = 'dballest_ReReco_workflow' couchServer = CouchServer(dburl=self.couchUrl) self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False) user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName) user.create() return
def resubmitCouchPublication(self, asourl, asodb, proxy, taskname): """ Resubmit failed publications by resetting the publication status in the CouchDB documents. """ server = CouchServer(dburl=asourl, ckey=proxy, cert=proxy) try: database = server.connectDatabase(asodb) except Exception as ex: msg = "Error while trying to connect to CouchDB: %s" % (str(ex)) raise Exception(msg) try: failedPublications = database.loadView('DBSPublisher', 'PublicationFailedByWorkflow', {'reduce': False, 'startkey': [taskname], 'endkey': [taskname, {}]})['rows'] except Exception as ex: msg = "Error while trying to load view 'DBSPublisher.PublicationFailedByWorkflow' from CouchDB: %s" % (str(ex)) raise Exception(msg) msg = "There are %d failed publications to resubmit: %s" % (len(failedPublications), failedPublications) self.logger.info(msg) for doc in failedPublications: docid = doc['id'] if doc['key'][0] != taskname: # this should never happen... msg = "Skipping document %s as it seems to correspond to another task: %s" % (docid, doc['key'][0]) self.logger.warning(msg) continue data = {'last_update': time.time(), 'retry': str(datetime.datetime.now()), 'publication_state': 'not_published', } try: database.updateDocument(docid, 'DBSPublisher', 'updateFile', data) self.logger.info("updating document %s " % docid) except Exception as ex: msg = "Error updating document %s in CouchDB: %s" % (docid, str(ex)) self.logger.error(msg) return
def __init__(self, config): """ Initialise class members """ # Need a better way to test this without turning off this next line BaseWorkerThread.__init__(self) # logging.basicConfig(format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s',datefmt = '%m-%d %H:%M') # self.logger = logging.getLogger() # self.logger is set up by the BaseWorkerThread, we just set it's level self.config = config.AsyncTransfer try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug("Configuration loaded") server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) config_server = CouchServer(dburl=self.config.config_couch_instance) self.config_db = config_server.connectDatabase(self.config.config_database) self.logger.debug("Connected to CouchDB") self.pool = Pool(processes=self.config.pool_size) try: self.phedex = PhEDEx(responseType="xml") except Exception, e: self.logger.exception("PhEDEx exception: %s" % e)
def saveCouch(self, couchUrl, couchDBName, metadata=None): """ Save this spec in CouchDB. Returns URL """ from WMCore.Database.CMSCouch import CouchServer, CouchInternalServerError metadata = metadata or {} server = CouchServer(couchUrl) database = server.connectDatabase(couchDBName) name = self.name() uri = '/%s/%s' % (couchDBName, name) specuri = uri + '/spec' if not database.documentExists(name): self.setSpecUrl(couchUrl + specuri) doc = database.put(uri, data=metadata, contentType='application/json') # doc = database.commitOne(self.name(), metadata) rev = doc['rev'] else: # doc = database.get(uri+'?revs=true') doc = database.document(name) rev = doc['_rev'] # specuriwrev = specuri + '?rev=%s' % rev workloadString = pickle.dumps(self.data) # result = database.put(specuriwrev, workloadString, contentType='application/text') retval = database.addAttachment(name, rev, workloadString, 'spec') if retval.get('ok', False) is not True: msg = "Failed to save a spec attachment in CouchDB for %s" % name raise CouchInternalServerError(msg, data=None, result=retval) url = couchUrl + specuri return url
class DQMCouchAPI(WMObject, WMConnectionBase): """ Update the harvesting status of a dataset in CouchDB """ def __init__(self, config, couchDbName = None, couchurl = None): WMObject.__init__(self, config) WMConnectionBase.__init__(self, "WMCore.WMBS") self.designDoc = "HarvestingDatasets" if couchDbName == None: self.dbname = getattr(self.config.HarvestingScheduler, "couchDBName", "dqm_default") else: self.dbname = couchDbName if couchurl is not None: self.couchurl = couchurl elif getattr(self.config.HarvestingScheduler, "couchurl", None) is not None: self.couchurl = self.config.HarvestingScheduler.couchurl else: self.couchurl = self.config.JobStateMachine.couchurl try: self.couchdb = CouchServer(self.couchurl) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname, size=_LIMIT) except Exception, ex: logging.error("Error connecting to couch: %s" % str(ex)) self.database = None return
def __init__(self, couchURL, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) self.summaryLevel = summaryLevel
def __init__(self, config, logger): self.config = config.CRABAsyncTransfer server = CouchServer(self.config.couch_instance) self.db = server.connectDatabase(self.config.files_database) self.logger = logger self.size = 0 self.result = []
def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("promptreco_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("promptreco_t") self.testDir = self.testInit.generateWorkDir() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return
def setUp(self): """ _setUp_ Setup couchdb and the test environment """ super(ResubmitBlockTest, self).setUp() self.group = 'unknown' self.user = '******' # Set external test helpers self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser") # Define test environment self.couchUrl = os.environ["COUCHURL"] self.acdcDBName = 'resubmitblock_t' self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL_Disk', 'T1_UK_RAL_Disk'] self.siteWhitelist = ['T2_XX_SiteA'] siteDB = SiteDB() #Convert phedex node name to a valid processing site name self.PSNs = siteDB.PNNstoPSNs(self.validLocations) self.workflowName = 'dballest_ReReco_workflow' couchServer = CouchServer(dburl=self.couchUrl) self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False) user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName) user.create() return
def __init__(self, config): """ Initialise class members """ #Need a better way to test this without turning off this next line BaseDaemon.__init__(self, config, 'AsyncTransfer') self.dropbox_dir = '%s/dropbox/outputs' % self.config.componentDir if not os.path.isdir(self.dropbox_dir): try: os.makedirs(self.dropbox_dir) except OSError as e: if e.errno == errno.EEXIST: pass else: self.logger.error('Unknown error in mkdir' % e.errno) raise server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) config_server = CouchServer(dburl=self.config.config_couch_instance) self.config_db = config_server.connectDatabase(self.config.config_database) self.logger.debug('Connected to CouchDB') self.pool = Pool(processes=self.config.pool_size) try: self.phedex = PhEDEx(responseType='xml', dict = {'key': self.config.opsProxy, 'cert': self.config.opsProxy}) except Exception as e: self.logger.exception('PhEDEx exception: %s' % e) # Set up a factory for loading plugins self.factory = WMFactory(self.config.schedAlgoDir, namespace = self.config.schedAlgoDir) result_list = [] current_running = []
class WMStatsReader(): def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False) def workflowsByStatus(self, statusList): keys = statusList options = {"stale": "update_after"} result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys) workflowList = [] for item in result["rows"]: workflowList.append(item["id"]) return workflowList def replicate(self, target): self.couchServer.replicate(self.dbName, target, continuous = True)
def __init__(self, config): BaseWorkerThread.__init__(self) self.config = config.AsyncTransfer # self.logger is set up by the BaseWorkerThread, we just set it's level try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug('Configuration loaded') # Set up a factory for loading plugins self.factory = WMFactory(self.config.pluginDir, namespace = self.config.pluginDir) # Asynch db server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) self.logger.debug('Connected to CouchDB') return
class WMStatsReader(): def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False) def workflowsByStatus(self, statusList, format = "list", stale = "update_after"): keys = statusList options = {} if stale: options = {"stale": stale} result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys) if format == "dict": workflowDict = {} for item in result["rows"]: workflowDict[item["id"]] = None return workflowDict else: workflowList = [] for item in result["rows"]: workflowList.append(item["id"]) return workflowList def workflowStatus(self, stale = "update_after"): """ _workflowStatus_ Return a dictionary with all available workflows, grouped by status and with the timestamp of the status """ options = {} if stale: options = {"stale" : stale} result = self.couchDB.loadView("WMStats", "requestByStatus", options) stateDict = {} for item in result['rows']: if item["key"] not in stateDict: stateDict[item["key"]] = {} stateDict[item["key"]][item["id"]] = item["value"] return stateDict def getDBInstance(self): return self.couchDB def getHeartbeat(self): try: return self.couchDB.info(); except Exception, ex: return {'error_message': str(ex)}
def __init__(self, config): self.config = config # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: server.createDatabase(self.config.database) self.database = Database(self.config.database, self.config.url) logging.debug("%s initialized." % self.__class__.__name__)
def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel
def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also add some dummy locations. """ super(WorkQueueTestCase, self).setUp() self.queueDB = 'workqueue_t' self.queueInboxDB = 'workqueue_t_inbox' self.globalQDB = 'workqueue_t_global' self.globalQInboxDB = 'workqueue_t_global_inbox' self.localQDB = 'workqueue_t_local' self.localQInboxDB = 'workqueue_t_local_inbox' self.localQDB2 = 'workqueue_t_local2' self.localQInboxDB2 = 'workqueue_t_local2_inbox' self.configCacheDB = 'workqueue_t_config_cache' self.logDBName = 'logdb_t' self.requestDBName = 'workqueue_t_reqmgr_workload_cache' self.setSchema() self.testInit = TestInit('WorkQueueTest') self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.addCleanup(self.testInit.clearDatabase) self.addCleanup(logging.debug, 'Cleanup called clearDatabase()') self.testInit.setSchema(customModules = self.schema, useDefault = False) self.testInit.setupCouch(self.queueDB, *self.couchApps) self.testInit.setupCouch(self.queueInboxDB, *self.couchApps) self.testInit.setupCouch(self.globalQDB, *self.couchApps) self.testInit.setupCouch(self.globalQInboxDB , *self.couchApps) self.testInit.setupCouch(self.localQDB, *self.couchApps) self.testInit.setupCouch(self.localQInboxDB, *self.couchApps) self.testInit.setupCouch(self.localQDB2, *self.couchApps) self.testInit.setupCouch(self.localQInboxDB2, *self.couchApps) self.testInit.setupCouch(self.configCacheDB, 'ConfigCache') self.testInit.setupCouch(self.logDBName, 'LogDB') self.testInit.setupCouch(self.requestDBName, 'ReqMgr') self.couchURL = os.environ.get("COUCHURL") couchServer = CouchServer(self.couchURL) self.configCacheDBInstance = couchServer.connectDatabase(self.configCacheDB) self.localCouchMonitor = CouchMonitor(self.couchURL) self.localCouchMonitor.deleteReplicatorDocs() self.addCleanup(self.localCouchMonitor.deleteReplicatorDocs) self.addCleanup(logging.debug, 'Cleanup called deleteReplicatorDocs()') self.addCleanup(self.testInit.tearDownCouch) self.addCleanup(logging.debug, 'Cleanup called tearDownCouch()') self.workDir = self.testInit.generateWorkDir() self.addCleanup(self.testInit.delWorkDir) self.addCleanup(logging.debug, 'Cleanup called delWorkDir()') return
def __init__(self, couchURL, dbName = None): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call if dbName: self.couchURL = couchURL self.dbName = dbName else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)
def testE_multicore(self): """ _multicore_ Create a workload summary based on the multicore job report """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl") workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup( config=config, name=workload.name(), specLocation=workloadPath, error=False, multicore=True ) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0, "No job should have survived") result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) workloadSummary = workdatabase.document(id="TestWorkload") self.assertAlmostEquals( workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["minMergeTime"]["average"], 5.7624950408900002, places=2, ) self.assertAlmostEquals( workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["numberOfMerges"]["average"], 3.0, places=2, ) self.assertAlmostEquals( workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["averageProcessTime"]["average"], 29.369966666700002, places=2, ) return
def testJ_Resubmission(self): """ _Resubmission_ Test Resubmission """ userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getAndSetupSchema(self, userName = userName, groupName = groupName, teamName = teamName) schema['RequestType'] = "ReReco" configID = self.createConfig() schema["ConfigCacheID"] = configID schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["CouchWorkloadDBName"] = self.couchDBName result = self.jsonSender.put('request', schema) requestName = result[0]['RequestName'] # user, group schema already set up schema = utils.getSchema(groupName = groupName, userName = userName) schema['RequestType'] = "Resubmission" schema["CouchWorkloadDBName"] = self.couchDBName try: raises = False result = self.jsonSender.put('request', schema) except HTTPException as ex: raises = True self.assertEqual(ex.status, 400) self.assertTrue("Error in Workload Validation: Validation failed: InitialTaskPath is mendatory" in ex.result) self.assertTrue(raises) schema["InitialTaskPath"] = '/%s/DataProcessing' % requestName schema["ACDCServer"] = os.environ.get("COUCHURL") schema["ACDCDatabase"] = self.couchDBName schema["CollectionName"] = "SomeOtherName" # Here we just make sure that real result goes through result = self.jsonSender.put('request', schema) resubmitName = result[0]['RequestName'] result = self.jsonSender.get('request/%s' % resubmitName) couchServer = CouchServer(self.testInit.couchUrl) reqmgrCouch = couchServer.connectDatabase(self.couchDBName) result = reqmgrCouch.loadView('ReqMgr', 'childresubmissionrequests', {}, [requestName])['rows'] self.assertEqual(len(result), 1) self.assertEqual(result[0]['key'], requestName) self.assertEqual(result[0]['id'], resubmitName)
def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("taskchain_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("taskchain_t") self.testInit.generateWorkDir() self.workload = None self.differentNCores = getTestFile( 'data/ReqMgr/requests/Integration/TaskChain_Task_Multicore.json') return
class CouchHandler(logging.handlers.HTTPHandler): def __init__(self, host, database): HTTPHandler.__init__(self, host, database, 'POST') from WMCore.Database.CMSCouch import CouchServer self.database = CouchServer(dburl=host).connectDatabase(database, size=10) def emit(self, record): """ Write a document to CouchDB representing the log message. """ doc = {} doc['message'] = record.msg doc['threadName'] = record.threadName doc['name'] = record.name doc['created'] = record.created doc['process'] = record.process doc['levelno'] = record.levelno doc['lineno'] = record.lineno doc['processName'] = record.processName doc['levelname'] = record.levelname self.database.commitOne(doc, timestamp=True)
def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) self.config = config.RetryManager try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug('Configuration loaded') try: server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) except Exception, e: self.logger.exception( 'A problem occured when connecting to couchDB: %s' % e) raise
def _setUp(self): """ Instantiate CouchServer reference. Test connection with CouchDB (first connect and retrieve attempt). """ try: couchURL = getattr(self.config, "couchURL", None) if not couchURL: raise Exception( "Configuration value 'couchURL' missing, can't connect to CouchDB." ) self.couch = CouchServer(couchURL) # retrieves result which is not used during this set up r = self.couch.makeRequest(self._query) except Exception as ex: msg = ("%s: could not connect to CouchDB, reason: %s" % (self.__class__.__name__, ex)) raise Exception(msg) # observables shall be list-like integers if not isinstance(self.config.observables, (list, tuple)): self.config.observables = tuple([self.config.observables])
def setUp(self): self.couchURL = os.getenv("COUCHURL") self.server = CouchServer(self.couchURL) # Kill off any databases left over from previous runs # In python 3 the variables defined inside a comprehension are deteled # outside the comprehension. See: pylint W1662 comprehension-escape dbs = [db for db in self.server.listDatabases() if db.startswith('rotdb_unittest_')] for db in dbs: try: self.server.deleteDatabase(db) except: pass # Create a database, drop an existing one first testname = self.id().split('.')[-1].lower() self.dbname = 'rotdb_unittest_%s' % testname self.arcname = 'rotdb_unittest_%s_archive' % testname self.seedname = 'rotdb_unittest_%s_seedcfg' % testname # set a long value for times, tests do operations explicitly self.timing = {'archive':timedelta(seconds=1), 'expire':timedelta(seconds=2)} self.db = RotatingDatabase(dbname = self.dbname, url = self.couchURL, archivename = self.arcname, timing = self.timing)
class CouchAppTestHarness: """ Test Harness for installing a couch database instance with several couchapps in a unittest.setUp and wiping it out in a unittest.tearDown """ def __init__(self, dbName, couchUrl=None): self.couchUrl = os.environ.get("COUCHURL", couchUrl) self.dbName = dbName if self.couchUrl == None: msg = "COUCHRURL env var not set..." raise RuntimeError(msg) if self.couchUrl.endswith('/'): raise RuntimeError("COUCHURL env var shouldn't end with /") self.couchServer = CouchServer(self.couchUrl) self.couchappConfig = Config() def create(self, dropExistingDb=True): """create couch db instance""" if self.dbName in self.couchServer.listDatabases(): if not dropExistingDb: return self.drop() self.couchServer.createDatabase(self.dbName) def drop(self): """blow away the couch db instance""" self.couchServer.deleteDatabase(self.dbName) def pushCouchapps(self, *couchappdirs): """ push a list of couchapps to the database """ for couchappdir in couchappdirs: couchapppush( self.couchappConfig, couchappdir, "%s/%s" % (self.couchUrl, urllib.quote_plus(self.dbName)))
def setUp(self): myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.databaseName = "couchapp_t_0" # Setup config for couch connections config = self.testInit.getConfiguration() self.testInit.setupCouch(self.databaseName, "WorkloadSummary") self.testInit.setupCouch( "%s/jobs" % config.JobStateMachine.couchDBName, "JobDump") self.testInit.setupCouch( "%s/fwjrs" % config.JobStateMachine.couchDBName, "FWJRDump") self.testInit.setupCouch(config.JobStateMachine.summaryStatsDBName, "SummaryStats") # Create couch server and connect to databases self.couchdb = CouchServer(config.JobStateMachine.couchurl) self.jobsdatabase = self.couchdb.connectDatabase( "%s/jobs" % config.JobStateMachine.couchDBName) self.fwjrdatabase = self.couchdb.connectDatabase( "%s/fwjrs" % config.JobStateMachine.couchDBName) self.statsumdatabase = self.couchdb.connectDatabase( config.JobStateMachine.summaryStatsDBName) # Create changeState self.changeState = ChangeState(config) self.config = config # Create testDir self.testDir = self.testInit.generateWorkDir() return
def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, ckey = None, cert = None, capath = None, detail = True): self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return
def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also add some dummy locations. """ self.queueDB = 'workqueue_t' self.queueInboxDB = 'workqueue_t_inbox' self.globalQDB = 'workqueue_t_global' self.globalQInboxDB = 'workqueue_t_global_inbox' self.localQDB = 'workqueue_t_local' self.localQInboxDB = 'workqueue_t_local_inbox' self.localQDB2 = 'workqueue_t_local2' self.localQInboxDB2 = 'workqueue_t_local2_inbox' self.configCacheDB = 'workqueue_t_config_cache' self.setSchema() self.testInit = TestInit('WorkQueueTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) self.testInit.setupCouch(self.queueDB, *self.couchApps) self.testInit.setupCouch(self.queueInboxDB, *self.couchApps) self.testInit.setupCouch(self.globalQDB, *self.couchApps) self.testInit.setupCouch(self.globalQInboxDB , *self.couchApps) self.testInit.setupCouch(self.localQDB, *self.couchApps) self.testInit.setupCouch(self.localQInboxDB, *self.couchApps) self.testInit.setupCouch(self.localQDB2, *self.couchApps) self.testInit.setupCouch(self.localQInboxDB2, *self.couchApps) self.testInit.setupCouch(self.configCacheDB, 'ConfigCache') couchServer = CouchServer(os.environ.get("COUCHURL")) self.configCacheDBInstance = couchServer.connectDatabase(self.configCacheDB) self.workDir = self.testInit.generateWorkDir() return
def __init__(self, db_url, db_name='workqueue', inbox_name=None, parentQueue=None, queueUrl=None, logger=None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name == None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create=False, size=10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'
def __init__(self, config): """ Initialise class members """ #Need a better way to test this without turning off this next line BaseWorkerThread.__init__(self) #logging.basicConfig(format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s',datefmt = '%m-%d %H:%M') #self.logger = logging.getLogger() # self.logger is set up by the BaseWorkerThread, we just set it's level self.config = config.DBSPublisher try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug('Configuration loaded') server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) self.logger.debug('Connected to CouchDB') # Set up a factory for loading plugins self.factory = WMFactory(self.config.schedAlgoDir, namespace = self.config.schedAlgoDir) self.pool = Pool(processes=self.config.publication_pool_size)
def __init__(self, config, couchDbName=None): WMObject.__init__(self, config) WMConnectionBase.__init__(self, "WMCore.WMBS") if couchDbName == None: self.dbname = getattr(self.config.JobStateMachine, "couchDBName") else: self.dbname = couchDbName self.jobsdatabase = None self.fwjrdatabase = None self.jsumdatabase = None self.statsumdatabase = None self.couchdb = CouchServer(self.config.JobStateMachine.couchurl) self._connectDatabases() try: self.dashboardReporter = DashboardReporter(config) except Exception as ex: logging.error("Error setting up the dashboard reporter: %s", str(ex)) raise self.getCouchDAO = self.daofactory("Jobs.GetCouchID") self.setCouchDAO = self.daofactory("Jobs.SetCouchID") self.incrementRetryDAO = self.daofactory("Jobs.IncrementRetry") self.workflowTaskDAO = self.daofactory("Jobs.GetWorkflowTask") self.jobTypeDAO = self.daofactory("Jobs.GetType") self.updateLocationDAO = self.daofactory("Jobs.UpdateLocation") self.getWorkflowSpecDAO = self.daofactory( "Workflow.GetSpecAndNameFromTask") self.maxUploadedInputFiles = getattr(self.config.JobStateMachine, 'maxFWJRInputFiles', 1000) self.workloadCache = {} return
def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False) #TODO: remove the hard coded name (wma_summarydb) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel
def setup(self, parameters): """ Called at startup """ self.teamName = self.config.Agent.teamName # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
def __init__(self, config): BaseWorkerThread.__init__(self) self.config = config.Analytics try: self.logger.setLevel(self.config.log_level) except: import logging self.logger = logging.getLogger() self.logger.setLevel(self.config.log_level) self.logger.debug('Configuration loaded') server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.db = server.connectDatabase(self.config.files_database) self.logger.debug('Connected to local couchDB') config_server = CouchServer(dburl=self.config.config_couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.config_db = config_server.connectDatabase(self.config.config_database) self.amq_auth_file = self.config.amq_auth_file monitoring_server = CouchServer(dburl=self.config.couch_user_monitoring_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy) self.monitoring_db = monitoring_server.connectDatabase(self.config.user_monitoring_db) self.logger.debug('Connected to user_monitoring_db in couchDB')
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.couchUrl = os.environ["COUCHURL"] self.couchDBName = "acdc_event_based_t" self.testInit.setupCouch(self.couchDBName, "GroupUser", "ACDC") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchSever = CouchServer(dburl=self.couchUrl) self.couchDB = couchSever.connectDatabase(self.couchDBName) self.populateWMBS() self.performanceParams = {'timePerEvent': 12, 'memoryRequirement': 2300, 'sizePerEvent': 400} self.eventsPerJob = 100 return
class Registration(): def __init__(self, cfg_dict={}, reg_info={}): """ Initialise the regsvc for this component, """ try: config_dict = { 'server': 'https://cmsweb.cern.ch/', 'database': 'registration', 'cacheduration': 1, } config_dict.update(cfg_dict) self.server = CouchServer(config_dict['server']) self.db = self.server.connectDatabase(config_dict['database']) if 'location' not in reg_info.keys(): raise KeyError('Registration needs a location in its reg_info') self.location_hash = str(reg_info['location'].__hash__()) reg_info['_id'] = self.location_hash reg_info['#config_hash'] = hash(str(reg_info)) push_cfg = True if self.db.documentExists(self.location_hash): # If the doc exists, check that the configuration hasn't changed doc = self.db.document(self.location_hash) push_cfg = doc['#config_hash'] != reg_info['#config_hash'] reg_info['_rev'] = doc['_rev'] if push_cfg: self.db.commitOne(reg_info) except: # Don't want to raise anything here # TODO: but should probably log... pass self.report() def report(self): """ 'Ping' the RegSvc with a doc containing the service doc's ID and a timestamp, this can be used to provide uptime information. """ try: self.db.commitOne({'service': self.location_hash}, timestamp=True) except: # Don't want to raise anything here # TODO: but should probably log... pass
def installCouchApp(couchUrl, couchDBName, couchAppName, basePath=None): """ _installCouchApp_ Install the given couch app on the given server in the given database. If the database already exists it will be deleted. """ if not basePath: basePath = couchAppRoot() print("Installing %s into %s" % (couchAppName, couchDBName)) couchServer = CouchServer(couchUrl) couchappConfig = Config() couchapppush(couchappConfig, "%s/%s" % (basePath, couchAppName), "%s/%s" % (couchUrl, couchDBName)) return
def __init__(self, config, couchDbName=None): WMObject.__init__(self, config) WMConnectionBase.__init__(self, "WMCore.WMBS") if couchDbName == None: self.dbname = getattr(self.config.JobStateMachine, "couchDBName") else: self.dbname = couchDbName self.couchdb = CouchServer(self.config.JobStateMachine.couchurl) self._connectDatabases() try: self.dashboardReporter = DashboardReporter(config) except Exception, ex: logging.error("Error setting up the \ - dashboard reporter: %s" % str(ex)) raise
class WMLoggingTest(unittest.TestCase): def setUp(self): # Make an instance of the server self.server = CouchServer( os.getenv("COUCHURL", 'http://*****:*****@localhost:5984')) testname = self.id().split('.')[-1] # Create a database, drop an existing one first self.dbname = 'cmscouch_unittest_%s' % testname.lower() if self.dbname in self.server.listDatabases(): self.server.deleteDatabase(self.dbname) self.server.createDatabase(self.dbname) self.db = self.server.connectDatabase(self.dbname) def tearDown(self): if self._exc_info()[0] == None: # This test has passed, clean up after it testname = self.id().split('.')[-1] self.server.deleteDatabase(self.dbname) def testLog(self): """ Write ten log messages to the database at three different levels """ my_logger = logging.getLogger('MyLogger') my_logger.setLevel(logging.DEBUG) handler = CouchHandler(self.server.url, self.dbname) formatter = logging.Formatter('%(message)s') handler.setFormatter(formatter) my_logger.addHandler(handler) for i in range(10): my_logger.debug('This is probably all noise.') my_logger.info('Jackdaws love my big sphinx of quartz.') my_logger.error('HOLLY CRAP!') logs = self.db.allDocs()['rows'] self.assertEqual(30, len(logs))
class Source: """ Plugins parent class. """ def __init__(self, config, logger): """ Initialise class members """ self.config = config self.logger = logger self.asyncServer = CouchServer(self.config.couch_instance) self.db = self.asyncServer.connectDatabase(self.config.files_database) try: query = {'limit': 1, 'descending': True} last_pollTime = self.db.loadView('AsyncTransfer', 'lastPollTime', query)['rows'][0]['key'] self.since = last_pollTime + 1 except: self.since = 0 try: self.phedexApi = PhEDEx(secure=True, dict={}) except Exception, e: self.logger.exception('PhEDEx object exception: %s' % e)
def testRemoveByCollectionName(self): """ _testRemoveByCollectionName_ Check the function to obliterate all the filesets of a collection """ self.populateCouchDB() svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) database = CouchServer(self.testInit.couchUrl).connectDatabase(self.testInit.couchDbName) results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"]) self.assertTrue(len(results["rows"]) > 0) svc.removeFilesetsByCollectionName("Thunderstruck") results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"]) self.assertEqual(len(results["rows"]), 0) results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"]) self.assertTrue(len(results["rows"]) > 0) svc.removeFilesetsByCollectionName("Struckthunder") results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"]) self.assertEqual(len(results["rows"]), 0) return
class ConfigCache(WMObject): """ _ConfigCache_ The class that handles the upload and download of configCache artifacts from Couch """ def __init__(self, dbURL, couchDBName=None, id=None, rev=None, usePYCurl=False, ckey=None, cert=None, capath=None, detail=True): self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return def createDatabase(self): """ _createDatabase_ """ database = self.couchdb.createDatabase(self.dbname) database.commit() return database def connectUserGroup(self, groupname, username): """ _connectUserGroup_ """ self.group = Group(name=groupname) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.owner = makeUser(groupname, username, couchUrl=self.dburl, couchDatabase=self.dbname) return def createUserGroup(self, groupname, username): """ _createUserGroup_ Create all the userGroup information """ self.createGroup(name=groupname) self.createUser(username=username) return def createGroup(self, name): """ _createGroup_ Create Group for GroupUser """ self.group = Group(name=name) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.group.create() return def setLabel(self, label): """ _setLabel_ Util to add a descriptive label to the configuration doc """ self.document['description']['config_label'] = label def setDescription(self, desc): """ _setDescription_ Util to add a verbose description string to a configuration doc """ self.document['description']['config_desc'] = desc @Decorators.requireGroup def createUser(self, username): self.owner = makeUser(self.group['name'], username, couchUrl=self.dburl, couchDatabase=self.dbname) self.owner.create() self.owner.ownThis(self.document) return @Decorators.requireGroup @Decorators.requireUser def save(self): """ _save_ Save yourself! Save your internal document. """ rawResults = self.database.commit(doc=self.document) # We should only be committing one document at a time # if not, get the last one. try: commitResults = rawResults[-1] self.document["_rev"] = commitResults.get('rev') self.document["_id"] = commitResults.get('id') except KeyError as ex: msg = "Document returned from couch without ID or Revision\n" msg += "Document probably bad\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(message=msg) # Now do the attachments for attachName in self.attachments: self.saveAttachment(name=attachName, attachment=self.attachments[attachName]) return def saveAttachment(self, name, attachment): """ _saveAttachment_ Save an attachment to the document """ retval = self.database.addAttachment(self.document["_id"], self.document["_rev"], attachment, name) if retval.get('ok', False) != True: # Then we have a problem msg = "Adding an attachment to document failed\n" msg += str(retval) msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"]) logging.error(msg) raise ConfigCacheException(msg) self.document["_rev"] = retval['rev'] self.document["_id"] = retval['id'] self.attachments[name] = attachment return def loadDocument(self, configID): """ _loadDocument_ Load a document from the document cache given its couchID """ self.document = self.docs_cache[configID] def loadByID(self, configID): """ _loadByID_ Load a document from the server given its couchID """ try: self.document = self.database.document(id=configID) if 'owner' in self.document.keys(): self.connectUserGroup( groupname=self.document['owner'].get('group', None), username=self.document['owner'].get('user', None)) if '_attachments' in self.document.keys(): # Then we need to load the attachments for key in self.document['_attachments'].keys(): self.loadAttachment(name=key) except CouchNotFoundError as ex: msg = "Document with id %s not found in couch\n" % (configID) msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) except Exception as ex: msg = "Error loading document from couch\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) return def loadAttachment(self, name, overwrite=True): """ _loadAttachment_ Load an attachment from the database and put it somewhere useful """ attach = self.database.getAttachment(self.document["_id"], name) if not overwrite: if name in self.attachments.keys(): logging.info("Attachment already exists, so we're skipping") return self.attachments[name] = attach return def loadByView(self, view, value): """ _loadByView_ Underlying code to load views """ viewRes = self.database.loadView('ConfigCache', view, {}, [value]) if len(viewRes['rows']) == 0: # Then we have a problem logging.error("Unable to load using view %s and value %s" % (view, str(value))) self.unwrapView(viewRes) self.loadByID(self.document["_id"]) return def saveConfigToDisk(self, targetFile): """ _saveConfigToDisk_ Make sure we can save our config file to disk """ config = self.getConfig() if not config: return # Write to a file f = open(targetFile, 'w') f.write(config) f.close() return def load(self): """ _load_ Figure out how to load """ if self.document.get("_id", None) != None: # Then we should load by ID self.loadByID(self.document["_id"]) return # Otherwise we have to load by view if not self.document.get('md5_hash', None) == None: # Then we have an md5_hash self.loadByView(view='config_by_md5hash', value=self.document['md5_hash']) # TODO: Add more views as they become available. #elif not self.owner == None: # Then we have an owner #self.loadByView(view = 'config_by_owner', value = self.owner['name']) def unwrapView(self, view): """ _unwrapView_ Move view information into the main document """ self.document["_id"] = view['rows'][0].get('id') self.document["_rev"] = view['rows'][0].get('value').get('_rev') def setPSetTweaks(self, PSetTweak): """ _setPSetTweaks_ Set the PSet tweak details for the config. """ self.document['pset_tweak_details'] = PSetTweak return def getPSetTweaks(self): """ _getPSetTweaks_ Retrieve the PSet tweak details. """ return self.document['pset_tweak_details'] def getOutputModuleInfo(self): """ _getOutputModuleInfo_ Retrieve the dataset information for the config in the ConfigCache. """ psetTweaks = self.getPSetTweaks() if not 'process' in psetTweaks.keys(): raise ConfigCacheException( "Could not find process field in PSet while getting output modules!" ) try: outputModuleNames = psetTweaks["process"]["outputModules_"] except KeyError as ex: msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(msg) results = {} for outputModuleName in outputModuleNames: try: outModule = psetTweaks["process"][outputModuleName] except KeyError: msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName logging.error(msg) raise ConfigCacheException(msg) dataset = outModule.get("dataset", None) if dataset: results[outputModuleName] = { "dataTier": outModule["dataset"]["dataTier"], "filterName": outModule["dataset"]["filterName"] } else: results[outputModuleName] = { "dataTier": None, "filterName": None } return results def addConfig(self, newConfig, psetHash=None): """ _addConfig_ """ # The newConfig parameter is a URL suitable for passing to urlopen. configString = urllib.urlopen(newConfig).read(-1) configMD5 = hashlib.md5(configString).hexdigest() self.document['md5_hash'] = configMD5 self.document['pset_hash'] = psetHash self.attachments['configFile'] = configString return def getConfig(self): """ _getConfig_ Get the currently active config """ return self.attachments.get('configFile', None) def getCouchID(self): """ _getCouchID_ Return the document's couchID """ return self.document["_id"] def getCouchRev(self): """ _getCouchRev_ Return the document's couchRevision """ return self.document["_rev"] @Decorators.requireGroup @Decorators.requireUser def delete(self): """ _delete_ Deletes the document with the current docid """ if not self.document["_id"]: logging.error("Attempted to delete with no couch ID") # TODO: Delete without loading first try: self.database.queueDelete(self.document) self.database.commit() except Exception as ex: msg = "Error in deleting document from couch" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) return def getIDFromLabel(self, label): """ _getIDFromLabel_ Retrieve the ID of a config given it's label. """ results = self.database.loadView("ConfigCache", "config_by_label", { "startkey": label, "limit": 1 }) if results["rows"][0]["key"] == label: return results["rows"][0]["value"] return None def listAllConfigsByLabel(self): """ _listAllConfigsByLabel_ Retrieve a list of all the configs in the config cache. This is returned in the form of a dictionary that is keyed by label. """ configs = {} results = self.database.loadView("ConfigCache", "config_by_label") for result in results["rows"]: configs[result["key"]] = result["value"] return configs def __str__(self): """ Make something printable """ return self.document.__str__() def validate(self, configID): try: #TODO: need to change to DataCache #self.loadDocument(configID = configID) self.loadByID(configID=configID) except Exception as ex: raise ConfigCacheException( "Failure to load ConfigCache while validating workload: %s" % str(ex)) if self.detail: duplicateCheck = {} try: outputModuleInfo = self.getOutputModuleInfo() except Exception as ex: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" raise ConfigCacheException("%s: %s" % (msg, str(ex))) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: raise ConfigCacheException("No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before raise ConfigCacheException( "Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) return outputModuleInfo else: return True
class WMStatsReader(object): # TODO need to get this from reqmgr api ACTIVE_STATUS = ["new", "assignment-approved", "assigned", "acquired", "running", "running-open", "running-closed", "failed", "force-complete", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected"] T0_ACTIVE_STATUS = ["new", "Closed", "Merge", "Harvesting", "Processing Done", "AlcaSkim", "completed"] def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"): self._sanitizeURL(couchURL) # set the connection for local couchDB call self._commonInit(couchURL, appName) if reqdbURL: self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp) else: self.reqDB = None def _sanitizeURL(self, couchURL): return sanitizeURL(couchURL)['url'] def _commonInit(self, couchURL, appName="WMStats"): """ setting up comon variables for inherited class. inherited class should call this in their init function """ self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = appName self.defaultStale = {"stale": "update_after"} def setDefaultStaleOptions(self, options): if not options: options = {} if 'stale' not in options: options.update(self.defaultStale) return options def getLatestJobInfoByRequests(self, requestNames): jobInfoByRequestAndAgent = {} if len(requestNames) > 0: requestAndAgentKey = self._getRequestAndAgent(requestNames) jobInfoByRequestAndAgent = self._getLatestJobInfo(requestAndAgentKey) return jobInfoByRequestAndAgent def _updateRequestInfoWithJobInfo(self, requestInfo): if len(requestInfo.keys()) != 0: jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys()) self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent) def _getCouchView(self, view, options, keys=None): keys = keys or [] options = self.setDefaultStaleOptions(options) if keys and isinstance(keys, str): keys = [keys] return self.couchDB.loadView(self.couchapp, view, options, keys) def _formatCouchData(self, data, key="id"): result = {} for row in data['rows']: if 'error' in row: continue if "doc" in row: result[row[key]] = row["doc"] else: result[row[key]] = None return result def _combineRequestAndJobData(self, requestData, jobData): """ update the request data with job info requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847", "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003", "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}}, "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731", "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}}, "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}}, "T2_DE_DESY":{"failure":{"exception":3},"success":375}}, "agent":"WMAgentCommissioning", "tasks": {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production": {"status":{"failure":{"exception":3},"success":331}, "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908, "dataset":{},"failure":{"exception":3}, "cmsRunCPUPerformance":{"totalJobCPU":10869688.8, "totalEventCPU":10832426.7, "totalJobTime":11255865.9}, "inputEvents":0}, "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573, "dataset":{}, "cmsRunCPUPerformance":{"totalJobCPU":167324.8, "totalEventCPU":166652.1, "totalJobTime":174975.7}, "inputEvents":0}}, "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0}, "jobtype":"Production"}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect": {"jobtype":"LogCollect", "subscription_status":{"updated":1392885768, "finished":0, "total":1,"open":1}}, "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput": {"status":{"success":41,"submitted":{"retry":1,"pending":1}}, "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915}, "success":41,"wrappedTotalJobTime":9190, "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM": {"totalLumis":973,"events":97300,"size":105698406915}}, "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125}, "inputEvents":97300}, "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}}, "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1}, "jobtype":"Merge"}, "agent_url":"vocms231.cern.ch:9999", "type":"agent_request"}} """ if jobData: for row in jobData["rows"]: # condition checks if documents are deleted between calls. # just ignore in that case if row["doc"]: jobInfo = requestData[row["doc"]["workflow"]] jobInfo.setdefault("AgentJobInfo", {}) jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"] def _getRequestAndAgent(self, filterRequest=None): """ returns the [['request_name', 'agent_url'], ....] """ options = {} options["reduce"] = True options["group"] = True result = self._getCouchView("requestAgentUrl", options) if filterRequest is None: keys = [row['key'] for row in result["rows"]] else: keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest] return keys def _getLatestJobInfo(self, keys): """ keys is [['request_name', 'agent_url'], ....] returns ids """ if len(keys) == 0: return [] options = {"include_docs": True} options["reduce"] = False result = self._getCouchView("latestRequest", options, keys) return result def _getAllDocsByIDs(self, ids, include_docs=True): """ keys is [id, ....] returns document """ if len(ids) == 0: return None options = {} options["include_docs"] = include_docs result = self.couchDB.allDocs(options, ids) return result def _getAgentInfo(self): """ returns all the agents status on wmstats """ options = {} result = self._getCouchView("agentInfo", options) return result def agentsByTeam(self, filterDrain=False): """ return a dictionary like {team:#agents,...} """ result = self._getAgentInfo() response = dict() for agentInfo in result["rows"]: #filtering empty string team = agentInfo['value']['agent_team'] if not team: continue response.setdefault(team, 0) if filterDrain: if not agentInfo['value'].get('drain_mode', False): response[team] += 1 else: response[team] += 1 return response def getServerInstance(self): return self.couchServer def getDBInstance(self): return self.couchDB def getRequestDBInstance(self): return self.reqDB def getHeartbeat(self): try: return self.couchDB.info() except Exception as ex: return {'error_message': str(ex)} def getRequestByNames(self, requestNames, jobInfoFlag=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. """ requestInfo = self.reqDB.getRequestByNames(requestNames, True) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getActiveData(self, jobInfoFlag=False): return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag) def getT0ActiveData(self, jobInfoFlag=False): return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag) def getRequestByStatus(self, statusList, jobInfoFlag=False, limit=None, skip=None, legacyFormat=False): """ To use this function reqDBURL need to be set when wmstats initialized. This will be deplicated so please don use this. If legacyFormat is True convert data to old wmstats format from current reqmgr format. Shouldn't be set to True unless existing code breaks """ requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip) if legacyFormat: # convert the format to wmstas old format for requestName, doc in requestInfo.items(): requestInfo[requestName] = convertToLegacyFormat(doc) if jobInfoFlag: # get request and agent info self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getRequestSummaryWithJobInfo(self, requestName): """ get request info with job status """ requestInfo = self.reqDB.getRequestByNames(requestName) self._updateRequestInfoWithJobInfo(requestInfo) return requestInfo def getArchivedRequests(self): """ get list of archived workflow in wmstats db. """ options = {"group_level": 1, "reduce": True} results = self.couchDB.loadView(self.couchapp, "allWorkflows", options=options)['rows'] requestNames = [x['key'] for x in results] workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames) archivedRequests = [] for request, value in workflowDict.items(): if value[0].endswith("-archived"): archivedRequests.append(request) return archivedRequests def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName): """ check whether workflow is completed including LogCollect and CleanUp tasks TODO: If the parent task all failed and next task are not created at all, It can't detect complete status. If the one of the task doesn't contain any jobs, it will return False """ requestInfo = self.getRequestSummaryWithJobInfo(requestName) reqInfoInstance = RequestInfo(requestInfo[requestName]) return reqInfoInstance.isWorkflowFinished() def getTaskJobSummaryByRequest(self, requestName, sampleSize=1): options = {'reduce': True, 'group_level': 5, 'startkey': [requestName], 'endkey': [requestName, {}]} results = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options) jobDetails = {} for row in results['rows']: # row["key"] = ['workflow', 'task', 'jobstatus', 'exitCode', 'site'] startKey = row["key"][:4] endKey = [] site = row["key"][4] if site: startKey.append(site) endKey.extend(startKey) endKey.append({}) numOfError = row["value"] jobInfo = self.jobDetailByTasks(startKey, endKey, numOfError, sampleSize) jobDetails = nestedDictUpdate(jobDetails, jobInfo) return jobDetails def jobDetailByTasks(self, startKey, endKey, numOfError, limit=1): options = {'include_docs': True, 'reduce': False, 'startkey': startKey, 'endkey': endKey, 'limit': limit} result = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options) jobInfoDoc = {} for row in result['rows']: keys = row['key'] workflow = keys[0] task = keys[1] jobStatus = keys[2] exitCode = keys[3] site = keys[4] jobInfoDoc.setdefault(workflow, {}) jobInfoDoc[workflow].setdefault(task, {}) jobInfoDoc[workflow][task].setdefault(jobStatus, {}) jobInfoDoc[workflow][task][jobStatus].setdefault(exitCode, {}) jobInfoDoc[workflow][task][jobStatus][exitCode].setdefault(site, {}) finalStruct = jobInfoDoc[workflow][task][jobStatus][exitCode][site] finalStruct["errorCount"] = numOfError finalStruct.setdefault("samples", []) finalStruct["samples"].append(row["doc"]) return jobInfoDoc def getAllAgentRequestRevByID(self, agentURL): options = {"reduce": False} results = self.couchDB.loadView(self.couchapp, "byAgentURL", options=options, keys=[agentURL]) idRevMap = {} for row in results['rows']: idRevMap[row['id']] = row['value']['rev'] return idRevMap
class WorkQueueBackend(object): """ Represents persistent storage for WorkQueue """ def __init__(self, db_url, db_name='workqueue', inbox_name=None, parentQueue=None, queueUrl=None, logger=None): if logger: self.logger = logger else: import logging self.logger = logging if inbox_name is None: inbox_name = "%s_inbox" % db_name self.server = CouchServer(db_url) self.parentCouchUrlWithAuth = parentQueue if parentQueue: self.parentCouchUrl = sanitizeURL(parentQueue)['url'] else: self.parentCouchUrl = None self.db = self.server.connectDatabase(db_name, create=False, size=10000) self.hostWithAuth = db_url self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000) self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url'] self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement' def forceQueueSync(self): """Force a blocking replication - used only in tests""" self.pullFromParent(continuous=False) self.sendToParent(continuous=False) def pullFromParent(self, continuous=True, cancel=False): """Replicate from parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source=self.parentCouchUrl, destination="%s/%s" % (self.hostWithAuth, self.inbox.name), filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication from %s failed: %s' % (self.parentCouchUrl, str(ex))) def sendToParent(self, continuous=True, cancel=False): """Replicate to parent couch - blocking: used only int test""" try: if self.parentCouchUrl and self.queueUrl: self.server.replicate(source="%s" % self.inbox.name, destination=self.parentCouchUrlWithAuth, filter='WorkQueue/queueFilter', query_params={ 'childUrl': self.queueUrl, 'parentUrl': self.parentCouchUrl }, continuous=continuous, cancel=cancel) except Exception as ex: self.logger.warning('Replication to %s failed: %s' % (self.parentCouchUrl, str(ex))) def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status='Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def insertWMSpec(self, wmspec): """ Insert WMSpec to backend """ # Can't save spec to inbox, it needs to be visible to child queues # Can't save empty dict so add dummy variable dummy_values = {'name': wmspec.name()} # change specUrl in spec before saving (otherwise it points to previous url) wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" % (self.db.name, wmspec.name())) return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values) def getWMSpec(self, name): """Get the spec""" wmspec = WMWorkloadHelper() wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name)) return wmspec def insertElements(self, units, parent=None): """ Insert element to database @param parent is the parent WorkQueueObject these element's belong to. i.e. a workflow which has been split """ if not units: return # store spec file separately - assume all elements share same spec self.insertWMSpec(units[0]['WMSpec']) newUnitsInserted = [] for unit in units: # cast to couch if not isinstance(unit, CouchWorkQueueElement): unit = CouchWorkQueueElement(self.db, elementParams=dict(unit)) if parent: unit['ParentQueueId'] = parent.id unit['TeamName'] = parent['TeamName'] unit['WMBSUrl'] = parent['WMBSUrl'] if unit._couch.documentExists(unit.id): self.logger.info( 'Element "%s" already exists, skip insertion.' % unit.id) continue else: newUnitsInserted.append(unit) unit.save() unit._couch.commit(all_or_nothing=True) return newUnitsInserted def createWork(self, spec, **kwargs): """Return the Inbox element for this spec. This does not persist it to the database. """ kwargs.update({ 'WMSpec': spec, 'RequestName': spec.name(), 'StartPolicy': spec.startPolicyParameters(), 'EndPolicy': spec.endPolicyParameters(), 'OpenForNewData': False }) unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs) unit.id = spec.name() return unit def getElements(self, status=None, elementIDs=None, returnIdOnly=False, db=None, loadSpec=False, WorkflowName=None, **elementFilters): """Return elements that match requirements status, elementIDs & filters are 'AND'ed together to filter elements. returnIdOnly causes the element not to be loaded and only the id returned db is used to specify which database to return from loadSpec causes the workflow for each spec to be loaded. WorkflowName may be used in the place of RequestName """ key = [] if not db: db = self.db if elementFilters.get('RequestName') and not WorkflowName: WorkflowName = elementFilters.pop('RequestName') if elementIDs: if elementFilters or status or returnIdOnly: raise ValueError( "Can't specify extra filters (or return id's) when using element id's with getElements()" ) elements = [ CouchWorkQueueElement(db, i).load() for i in elementIDs ] else: options = { 'include_docs': True, 'filter': elementFilters, 'idOnly': returnIdOnly, 'reduce': False } # filter on workflow or status if possible filterName = 'elementsByWorkflow' if WorkflowName: key.append(WorkflowName) elif status: filterName = 'elementsByStatus' key.append(status) elif elementFilters.get('SubscriptionId'): key.append(elementFilters['SubscriptionId']) filterName = 'elementsBySubscription' # add given params to filters if status: options['filter']['Status'] = status if WorkflowName: options['filter']['RequestName'] = WorkflowName view = db.loadList('WorkQueue', 'filter', filterName, options, key) view = json.loads(view) if returnIdOnly: return view elements = [ CouchWorkQueueElement.fromDocument(db, row) for row in view ] if loadSpec: specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = self.getWMSpec(ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements def getInboxElements(self, *args, **kwargs): """ Return elements from Inbox, supports same semantics as getElements() """ return self.getElements(*args, db=self.inbox, **kwargs) def getElementsForWorkflow(self, workflow): """Get elements for a workflow""" elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': workflow, 'include_docs': True, 'reduce': False }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParent(self, parent): """Get elements with the given parent""" elements = self.db.loadView('WorkQueue', 'elementsByParent', { 'key': parent.id, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def saveElements(self, *elements): """Persist elements Returns elements successfully saved, user must verify to catch errors """ result = [] if not elements: return result for element in elements: element.save() answer = elements[0]._couch.commit() result, failures = formatReply(answer, *elements) msg = 'Couch error saving element: "%s", error "%s", reason "%s"' for failed in failures: self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) return result def _raiseConflictErrorAndLog(self, conflictIDs, updatedParams, dbName="workqueue"): errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % ( dbName, conflictIDs, updatedParams) self.logger.error(errorMsg) raise WorkQueueError(errorMsg) def updateElements(self, *elementIds, **updatedParams): """Update given element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.db.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams) return def updateInboxElements(self, *elementIds, **updatedParams): """Update given inbox element's (identified by id) with new parameters""" if not elementIds: return eleParams = {} eleParams[self.eleKey] = updatedParams conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle( elementIds, eleParams) if conflictIDs: self._raiseConflictErrorAndLog(conflictIDs, updatedParams, "workqueue_inbox") return def deleteElements(self, *elements): """Delete elements""" if not elements: return specs = {} for i in elements: i.delete() specs[i['RequestName']] = None answer = elements[0]._couch.commit() _, failures = formatReply(answer, *elements) msg = 'Couch error deleting element: "%s", error "%s", reason "%s"' for failed in failures: # only count delete as failed if document still exists if elements[0]._couch.documentExists(failed['id']): self.logger.error( msg % (failed['id'], failed['error'], failed['reason'])) # delete specs if no longer used for wf in specs: try: if not self.db.loadView('WorkQueue', 'elementsByWorkflow', { 'key': wf, 'limit': 1, 'reduce': False })['rows']: self.db.delete_doc(wf) except CouchNotFoundError: pass def availableWork(self, thresholds, siteJobCounts, team=None, wfs=None, excludeWorkflows=None, numElems=9999999): """ Get work which is available to be run Assume thresholds is a dictionary; keys are the site name, values are the maximum number of running jobs at that site. Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site name and task priorities. The value is the number of jobs running at that priority. It will pull work until it reaches the number of elements configured (numElems). Since it's also used for calculating free resources, default it to "infinity" Note: this method will be called with no limit of work elements when it's simply calculating the resources available (based on what is in LQ), before it gets work from GQ """ self.logger.info("Getting up to %d available work from %s", numElems, self.queueUrl) excludeWorkflows = excludeWorkflows or [] elements = [] sortedElements = [] # We used to pre-filter sites, looking to see if there are idle job slots # We don't do this anymore, as we may over-allocate # jobs to sites if the new jobs have a higher priority. # If there are no sites, punt early. if not thresholds: self.logger.error("No thresholds is set: Please check") return elements, thresholds, siteJobCounts options = {} options['include_docs'] = True options['descending'] = True options['resources'] = thresholds if team: options['team'] = team self.logger.info("setting team to %s" % team) if wfs: result = [] for i in xrange(0, len(wfs), 20): options['wfs'] = wfs[i:i + 20] data = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result.extend(json.loads(data)) else: result = self.db.loadList('WorkQueue', 'workRestrictions', 'availableByPriority', options) result = json.loads(result) if len(result) == 0: self.logger.info( """No available work in WQ or didn't pass workqueue restriction - check Pileup, site white list, etc""") self.logger.debug("Available Work:\n %s \n for resources\n %s" % (result, thresholds)) # Iterate through the results; apply whitelist / blacklist / data # locality restrictions. Only assign jobs if they are high enough # priority. for i in result: element = CouchWorkQueueElement.fromDocument(self.db, i) # filter out exclude list from abvaling if element['RequestName'] not in excludeWorkflows: sortedElements.append(element) # sort elements to get them in priority first and timestamp order sortedElements.sort(key=lambda element: element['CreationTime']) sortedElements.sort(key=lambda x: x['Priority'], reverse=True) for element in sortedElements: if numElems <= 0: self.logger.info( "Reached the maximum number of elements to be pulled: %d", len(elements)) break if not possibleSites(element): self.logger.info("No possible sites for %s with doc id %s", element['RequestName'], element.id) continue prio = element['Priority'] possibleSite = None sites = thresholds.keys() random.shuffle(sites) for site in sites: if element.passesSiteRestriction(site): # Count the number of jobs currently running of greater priority curJobCount = sum([ x[1] if x[0] >= prio else 0 for x in siteJobCounts.get(site, {}).items() ]) self.logger.debug( "Job Count: %s, site: %s thresholds: %s" % (curJobCount, site, thresholds[site])) if curJobCount < thresholds[site]: possibleSite = site break if possibleSite: numElems -= 1 self.logger.debug("Possible site exists %s" % str(possibleSite)) elements.append(element) if possibleSite not in siteJobCounts: siteJobCounts[possibleSite] = {} siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \ element['Jobs'] * element.get('blowupFactor', 1.0) else: self.logger.debug( "No available resources for %s with doc id %s", element['RequestName'], element.id) return elements, thresholds, siteJobCounts def getActiveData(self): """Get data items we have work in the queue for""" data = self.db.loadView('WorkQueue', 'activeData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActiveParentData(self): """Get data items we have work in the queue for with parent""" data = self.db.loadView('WorkQueue', 'activeParentData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getActivePileupData(self): """Get data items we have work in the queue for with pileup""" data = self.db.loadView('WorkQueue', 'activePileupData', { 'reduce': True, 'group': True }) return [{ 'dbs_url': x['key'][0], 'name': x['key'][1] } for x in data.get('rows', [])] def getElementsForData(self, data): """Get active elements for this dbs & data combo""" elements = self.db.loadView('WorkQueue', 'elementsByData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForParentData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByParentData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def getElementsForPileupData(self, data): """Get active elements for this data """ elements = self.db.loadView('WorkQueue', 'elementsByPileupData', { 'key': data, 'include_docs': True }) return [ CouchWorkQueueElement.fromDocument(self.db, x['doc']) for x in elements.get('rows', []) ] def isAvailable(self): """Is the server available, i.e. up and not compacting""" try: compacting = self.db.info()['compact_running'] if compacting: self.logger.info("CouchDB compacting - try again later.") return False except Exception as ex: self.logger.error("CouchDB unavailable: %s" % str(ex)) return False return True def getWorkflows(self, includeInbox=False, includeSpecs=False): """Returns workflows known to workqueue""" result = set([ x['key'] for x in self.db.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeInbox: result = result | set([ x['key'] for x in self.inbox.loadView( 'WorkQueue', 'elementsByWorkflow', {'group': True})['rows'] ]) if includeSpecs: result = result | set([ x['key'] for x in self.db.loadView('WorkQueue', 'specsByWorkflow')['rows'] ]) return list(result) def queueLength(self): """Return number of available elements""" return self.db.loadView('WorkQueue', 'availableByPriority', {'limit': 0})['total_rows'] def fixConflicts(self): """Fix elements in conflict Each local queue runs this to resolve its conflicts with global, resolution propagates up to global. Conflicting elements are merged into one element with others deleted. This will fail if elements are modified during the resolution - if this happens rerun. """ for db in [self.inbox, self.db]: for row in db.loadView('WorkQueue', 'conflicts')['rows']: element_id = row['id'] try: conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \ for rev in row['value']] fixed_elements = fixElementConflicts(*conflicting_elements) if self.saveElements(fixed_elements[0]): self.saveElements( *fixed_elements[1:] ) # delete others (if merged value update accepted) except Exception as ex: self.logger.error("Error resolving conflict for %s: %s" % (element_id, str(ex))) def recordTaskActivity(self, taskname, comment=''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex))) def getWMBSInjectStatus(self, request=None): """ This service only provided by global queue except on draining agent """ options = {'group': True, 'reduce': True} if request: options.update(key=request) data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest', options) if request: if data['rows']: injectionStatus = data['rows'][0]['value'] inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False return injectionStatus and not requestOpen else: raise WorkQueueNoMatchingElements("%s not found" % request) else: injectionStatus = dict( (x['key'], x['value']) for x in data.get('rows', [])) finalInjectionStatus = [] for request in injectionStatus.keys(): inboxElement = self.getInboxElements(WorkflowName=request) requestOpen = inboxElement[0].get( 'OpenForNewData', False) if inboxElement else False finalInjectionStatus.append( {request: injectionStatus[request] and not requestOpen}) return finalInjectionStatus def getWorkflowNames(self, inboxFlag=False): """Get workflow names from workqueue db""" if inboxFlag: db = self.inbox else: db = self.db data = db.loadView('WorkQueue', 'elementsByWorkflow', { 'stale': "update_after", 'reduce': True, 'group': True }) return [x['key'] for x in data.get('rows', [])] def deleteWQElementsByWorkflow(self, workflowNames): """ delete workqueue elements belongs to given workflow names it doen't check the status of workflow so need to be careful to use this. Pass only workflows which has the end status """ deleted = 0 dbs = [self.db, self.inbox] if not isinstance(workflowNames, list): workflowNames = [workflowNames] if len(workflowNames) == 0: return deleted options = {} options["stale"] = "update_after" options["reduce"] = False for couchdb in dbs: result = couchdb.loadView("WorkQueue", "elementsByWorkflow", options, workflowNames) ids = [] for entry in result["rows"]: ids.append(entry["id"]) if ids: couchdb.bulkDeleteByIDs(ids) deleted += len(ids) # delete the workflow with spec from workqueue db for wf in workflowNames: self.db.delete_doc(wf) return deleted
def main(options): url, dbName = splitCouchServiceURL(options.dburl) db = CouchServer(url).connectDatabase(dbName) reqmgr_requests = generate_reqmgr_requests(options.requests) agent_requests = generate_agent_requests(options.requests, options.iterations) if options.add_couchapp: installCouchApp(url, dbName, "WMStats", options.couchapp_base) if options.add_reqmgr_data: for req in reqmgr_requests: db.queue(req) db.commit() print("Added %s reqmgr requests" % len(reqmgr_requests)) if options.add_agent_data: for req in agent_requests: db.queue(req) jobDocs = generate_jobsummary(req['workflow']) for job in jobDocs: db.queue(job) db.commit() print("Added %s agent requests" % len(agent_requests)) print("Added %s job Docs" % (len(agent_requests) * len(jobDocs)))
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqdbURL=reqmgrUrl, reqdbCouchApp="ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = [ 'normal-archived', 'rejected-archived', 'aborted-archived' ] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag, legacyFormat=True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: # log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({ 'filterEfficiency': filterEfficiency, 'runWhiteList': runWhiteList }) if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \ oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown': prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID=prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime': 'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry[ 'step'] == 'announced': dateString = entry['updater'][ 'submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({ 'mcmApprovalTime': time.mktime(dt.timetuple()) }) found = True if not found: log.error( "History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime': 'NoMcMData'}) except (RuntimeError, IOError): exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info( ) log.error( "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({ 'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData') }) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error( "%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf].get('priority', 0) requestType = requests[wf].get('request_type', 'Unknown') targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf].get('campaign', 'Unknown') prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error( "Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[ wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime': int(time.time())}) if totalJobs and successJobs == totalJobs and not report[ wf].get('lastJobTime', None): report[wf].update({'lastJobTime': int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime': approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime': assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime': acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime': closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime': announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime': completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime': newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime': archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate': requestDate}) except: pass report[wf].update({ 'priority': priority, 'status': finalStatus, 'type': requestType }) report[wf].update({ 'totalLumis': targetLumis, 'totalEvents': targetEvents, }) report[wf].update({ 'campaign': campaign, 'prepID': prep_id, 'outputTier': outputTier, }) report[wf].update({ 'outputDatasets': outputdatasets, 'inputDataset': inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [ 1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100 ]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get( percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({ 'eventProgress': eventProgress, 'lumiProgress': lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log.debug("Workflow updated: %s" % wf) pass else: # log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) dummy = json.dumps( newCouchDoc ) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def __init__(self, config): """ __init__ Create all DAO objects that are used by this class. """ WMConnectionBase.__init__(self, "WMCore.WMBS") myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.getOutputMapAction = self.daofactory( classname="Jobs.GetOutputMap") self.bulkAddToFilesetAction = self.daofactory( classname="Fileset.BulkAddByLFN") self.bulkParentageAction = self.daofactory( classname="Files.AddBulkParentage") self.getJobTypeAction = self.daofactory(classname="Jobs.GetType") self.getParentInfoAction = self.daofactory( classname="Files.GetParentInfo") self.setParentageByJob = self.daofactory( classname="Files.SetParentageByJob") self.setParentageByMergeJob = self.daofactory( classname="Files.SetParentageByMergeJob") self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi") self.setFileLocation = self.daofactory( classname="Files.SetLocationByLFN") self.setFileAddChecksum = self.daofactory( classname="Files.AddChecksumByLFN") self.addFileAction = self.daofactory(classname="Files.Add") self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput") self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk") self.getWorkflowSpec = self.daofactory( classname="Workflow.GetSpecAndNameFromTask") self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID") self.getFullJobInfo = self.daofactory( classname="Jobs.LoadForErrorHandler") self.getJobTaskNameAction = self.daofactory( classname="Jobs.GetFWJRTaskName") self.pnn_to_psn = self.daofactory( classname="Locations.GetPNNtoPSNMapping").execute() self.dbsStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.SetStatus") self.dbsParentStatusAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetParentStatus") self.dbsChildrenAction = self.dbsDaoFactory( classname="DBSBufferFiles.GetChildren") self.dbsCreateFiles = self.dbsDaoFactory( classname="DBSBufferFiles.Add") self.dbsSetLocation = self.dbsDaoFactory( classname="DBSBufferFiles.SetLocationByLFN") self.dbsInsertLocation = self.dbsDaoFactory( classname="DBSBufferFiles.AddLocation") self.dbsSetChecksum = self.dbsDaoFactory( classname="DBSBufferFiles.AddChecksumByLFN") self.dbsSetRunLumi = self.dbsDaoFactory( classname="DBSBufferFiles.AddRunLumi") self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow") self.dbsLFNHeritage = self.dbsDaoFactory( classname="DBSBufferFiles.BulkHeritageParent") self.stateChanger = ChangeState(config) # Decide whether or not to attach jobReport to returned value self.returnJobReport = getattr(config.JobAccountant, 'returnReportFromWorker', False) # Store location for the specs for DBS self.specDir = getattr(config.JobAccountant, 'specDir', None) # maximum RAW EDM size for Repack output before data is put into Error dataset and skips PromptReco self.maxAllowedRepackOutputSize = getattr( config.JobAccountant, 'maxAllowedRepackOutputSize', 12 * 1024 * 1024 * 1024) # ACDC service self.dataCollection = DataCollectionService( url=config.ACDC.couchurl, database=config.ACDC.database) jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url'] jobDBName = config.JobStateMachine.couchDBName jobCouchdb = CouchServer(jobDBurl) self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL, appName="WMStatsAgent") # Hold data for later commital self.dbsFilesToCreate = [] self.wmbsFilesToBuild = [] self.wmbsMergeFilesToBuild = [] self.fileLocation = None self.mergedOutputFiles = [] self.listOfJobsToSave = [] self.listOfJobsToFail = [] self.filesetAssoc = [] self.parentageBinds = [] self.parentageBindsForMerge = [] self.jobsWithSkippedFiles = {} self.count = 0 self.datasetAlgoID = collections.deque(maxlen=1000) self.datasetAlgoPaths = collections.deque(maxlen=1000) self.dbsLocations = set() self.workflowIDs = collections.deque(maxlen=1000) self.workflowPaths = collections.deque(maxlen=1000) self.phedex = PhEDEx() self.locLists = self.phedex.getNodeMap() return
class RequestDBReader(): def __init__(self, couchURL, couchapp="ReqMgr"): couchURL = sanitizeURL(couchURL)['url'] # set the connection for local couchDB call self._commonInit(couchURL, couchapp) def _commonInit(self, couchURL, couchapp): """ setting up comon variables for inherited class. inherited class should call this in their init function """ if isinstance(couchURL, Database): self.couchDB = couchURL self.couchURL = self.couchDB['host'] self.dbName = self.couchDB.name self.couchServer = CouchServer(self.couchURL) else: self.couchURL, self.dbName = splitCouchServiceURL(couchURL) self.couchServer = CouchServer(self.couchURL) self.couchDB = self.couchServer.connectDatabase(self.dbName, False) self.couchapp = couchapp self.defaultStale = {"stale": "update_after"} def setDefaultStaleOptions(self, options): if not options: options = {} if 'stale' not in options: options.update(self.defaultStale) return options def _setNoStale(self): """ Use this only for the unittest """ self.defaultStale = {} def _getCouchView(self, view, options, keys=[]): options = self.setDefaultStaleOptions(options) if keys and isinstance(keys, basestring): keys = [keys] return self.couchDB.loadView(self.couchapp, view, options, keys) def _filterCouchInfo(self, couchInfo): # remove the couch specific information for key in ['_rev', '_attachments']: if key in couchInfo: del couchInfo[key] return def _formatCouchData(self, data, key="id", detail=True, filterCouch=True, returnDict=False): result = {} for row in data['rows']: if 'error' in row: continue if "doc" in row: if filterCouch: self._filterCouchInfo(row["doc"]) result[row[key]] = row["doc"] else: result[row[key]] = row["value"] if detail or returnDict: return result else: return result.keys() def _getRequestByNames(self, requestNames, detail): """ 'status': list of the status """ options = {} options["include_docs"] = detail result = self.couchDB.allDocs(options, requestNames) return result def _getRequestByStatus(self, statusList, detail, limit, skip): """ 'status': list of the status """ options = {} options["include_docs"] = detail if limit != None: options["limit"] = limit if skip != None: options["skip"] = skip keys = statusList return self._getCouchView("bystatus", options, keys) def _getRequestByStatusAndStartTime(self, status, detail, endTime): """ 'status': is the status of the workflow 'startTime': unix timestamp for start time """ options = {} options["include_docs"] = detail options["startkey"] = [status, 0] options["endkey"] = [status, endTime] options["descending"] = False return self._getCouchView("bystatusandtime", options) def _getRequestByTeamAndStatus(self, team, status, limit): """ 'status': is the status of the workflow 'startTime': unix timestamp for start time """ options = {} if limit: options["limit"] = limit if team and status: options["key"] = [team, status] elif team and not status: options["startkey"] = [team] options["endkey"] = [team, status] # status = {} return self._getCouchView("byteamandstatus", options) def _getAllDocsByIDs(self, ids, include_docs=True): """ keys is [id, ....] returns document """ if len(ids) == 0: return [] options = {} options["include_docs"] = include_docs result = self.couchDB.allDocs(options, ids) return result def getDBInstance(self): return self.couchDB def getRequestByNames(self, requestNames, detail=True): if isinstance(requestNames, basestring): requestNames = [requestNames] if len(requestNames) == 0: return {} data = self._getRequestByNames(requestNames, detail=detail) requestInfo = self._formatCouchData(data, detail=detail) return requestInfo def getRequestByStatus(self, statusList, detail=False, limit=None, skip=None): data = self._getRequestByStatus(statusList, detail, limit, skip) requestInfo = self._formatCouchData(data, detail=detail) return requestInfo def getRequestByStatusAndStartTime(self, status, detail=False, endTime=0): if endTime == 0: data = self._getRequestByStatus([status], detail, limit=None, skip=None) else: data = self._getRequestByStatusAndStartTime( status, detail, endTime) requestInfo = self._formatCouchData(data, detail=detail) return requestInfo def getRequestByTeamAndStatus(self, team, status, detail=False, limit=None): """ 'team': team name in which the workflow was assigned to. 'status': a single status string. """ if team and status: data = self._getRequestByTeamAndStatus(team, status, limit) elif team and not status: data = self._getRequestByTeamAndStatus(team, status={}, limit=limit) elif not team and not status: data = self._getRequestByTeamAndStatus(team={}, status={}, limit=limit) else: # nothing we can do with status only return requestInfo = self._formatCouchData(data, detail=detail) return requestInfo def getRequestByCouchView(self, view, options, keys=[], returnDict=True): options.setdefault("include_docs", True) data = self._getCouchView(view, options, keys) requestInfo = self._formatCouchData(data, returnDict=returnDict) return requestInfo def getStatusAndTypeByRequest(self, requestNames): if isinstance(requestNames, basestring): requestNames = [requestNames] if len(requestNames) == 0: return {} data = self._getCouchView("byrequest", {}, requestNames) requestInfo = self._formatCouchData(data, returnDict=True) return requestInfo
def testF_TaskChain(self): """ _TaskChain_ Test the monstrous TaskChain workflow This will be a long one NOTE: This test is so complicated that all I do is take code from TaskChain_t and make sure it still produces and actual request """ couchServer = CouchServer(os.environ["COUCHURL"]) configDatabase = couchServer.connectDatabase(self.couchDBName) generatorDoc = makeGeneratorConfig(configDatabase) processorDocs = makeProcessingConfigs(configDatabase) userName = '******' groupName = 'Li' teamName = 'Tang' schema = utils.getSchema(userName=userName) schema["CouchURL"] = os.environ["COUCHURL"] schema["CouchDBName"] = self.couchDBName schema["CouchWorkloadDBName"] = self.couchDBName schema["SiteWhitelist"] = ["T1_CH_CERN", "T1_US_FNAL"] schema["TaskChain"] = 5 chains = { "Task1": { "TaskName": "GenSim", "ConfigCacheID": generatorDoc, "SplittingAlgo": "EventBased", "EventsPerJob": 250, "RequestNumEvents": 10000, "PrimaryDataset": "RelValTTBar" }, "Task2": { "TaskName": "DigiHLT", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "FileBased" }, "Task3": { "TaskName": "Reco", "InputTask": "DigiHLT", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased" }, "Task4": { "TaskName": "ALCAReco", "InputTask": "Reco", "InputFromOutputModule": "writeALCA", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "FileBased" }, "Task5": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "FileBased", "FilesPerJob": 10 } } schema.update(chains) args = utils.getAndSetupSchema(self, userName=userName, groupName=groupName, teamName=teamName) schema.update(args) # this is necessary and after all updates to the schema are made, # otherwise this item will get overwritten schema['RequestType'] = "TaskChain" schema["CouchDBName"] = self.couchDBName schema["CouchURL"] = os.environ.get("COUCHURL") schema["CouchWorkloadDBName"] = self.couchDBName result = self.jsonSender.put('request', schema) requestName = result[0]['RequestName'] result = self.jsonSender.get('request/%s' % requestName) request = result[0] self.assertEqual(request['CMSSWVersion'], schema['CMSSWVersion']) self.assertEqual(request['Group'], groupName) self.assertEqual(request['Requestor'], userName) workload = self.loadWorkload(requestName) self.assertEqual(workload.data.request.schema.Task1["EventsPerJob"], 250)