def __init__(self, msConfig, **kwargs): """ Provides setup for MSTransferor and MSMonitor classes :param config: MS service configuration :param kwargs: can be used to skip the initialization of specific services, such as: logger: logger object skipReqMgr: boolean to skip ReqMgr initialization skipReqMgrAux: boolean to skip ReqMgrAux initialization skipRucio: boolean to skip Rucio initialization """ self.logger = getMSLogger(getattr(msConfig, 'verbose', False), kwargs.get("logger")) self.msConfig = msConfig self.logger.info("Configuration including default values:\n%s", self.msConfig) if not kwargs.get("skipReqMgr", False): self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger) if not kwargs.get("skipReqMgrAux", False): self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'], httpDict={'cacheduration': 1.0}, logger=self.logger) self.phedex = None self.rucio = None if not kwargs.get("skipRucio", False): self.rucio = Rucio(acct=self.msConfig['rucioAccount'], hostUrl=self.msConfig['rucioUrl'], authUrl=self.msConfig['rucioAuthUrl'], configDict={"logger": self.logger, "user_agent": "wmcore-microservices"})
def setUp(self): self.setConfig(config) self.setCouchDBs([(config.views.data.couch_reqmgr_db, "ReqMgr"), (config.views.data.couch_reqmgr_aux_db, None)]) self.setSchemaModules([]) RESTBaseUnitTestWithDBBackend.setUp(self) self.setFakeDN() normPath = os.path.normpath( os.path.join(os.path.dirname(__file__), '..', '..', '..', '..')) rerecoPath = os.path.join( normPath, 'data/ReqMgr/requests/DMWM/ReReco_RunBlockWhite.json') with open(rerecoPath) as jObj: rerecoArgs = json.load(jObj) self.rerecoCreateArgs = rerecoArgs["createRequest"] self.rerecoAssignArgs = rerecoArgs["assignRequest"] cmsswDoc = {"_id": "software"} cmsswDoc[self.rerecoCreateArgs["ScramArch"]] = [] cmsswDoc[self.rerecoCreateArgs["ScramArch"]].append( self.rerecoCreateArgs["CMSSWVersion"]) insertDataToCouch(os.getenv("COUCHURL"), config.views.data.couch_reqmgr_aux_db, cmsswDoc) self.reqSvc = ReqMgr(self.jsonSender["host"]) self.reqSvc._noStale = True self.reqSvc['requests'].additionalHeaders = self.create_header
def __init__(self, msConfig, logger=None): """ Provides setup for MSTransferor and MSMonitor classes :param config: MS service configuration :param logger: logger object (optional) """ self.logger = getMSLogger(getattr(msConfig, 'verbose', False), logger) self.msConfig = msConfig self.logger.info("Configuration including default values:\n%s", self.msConfig) self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger) self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'], httpDict={'cacheduration': 1.0}, logger=self.logger) # hard code it to production DBS otherwise PhEDEx subscribe API fails to match TMDB data dbsUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" if usingRucio(): # FIXME: we cannot use Rucio in write mode yet # self.rucio = Rucio(self.msConfig['rucioAccount'], configDict={"logger": self.logger}) self.phedex = PhEDEx(httpDict={'cacheduration': 0.5}, dbsUrl=dbsUrl, logger=self.logger) else: self.phedex = PhEDEx(httpDict={'cacheduration': 0.5}, dbsUrl=dbsUrl, logger=self.logger)
def setUp(self): self.setConfig(config) self.setCouchDBs([(config.views.data.couch_reqmgr_db, "ReqMgr"), (config.views.data.couch_reqmgr_aux_db, None)]) self.setSchemaModules([]) RESTBaseUnitTestWithDBBackend.setUp(self) self.setFakeDN() requestPath = os.path.join(getWMBASE(), "test", "data", "ReqMgr", "requests", "DMWM") rerecoFile = open(os.path.join(requestPath, "ReReco.json"), 'r') rerecoArgs = json.load(rerecoFile) self.rerecoCreateArgs = rerecoArgs["createRequest"] self.rerecoAssignArgs = rerecoArgs["assignRequest"] cmsswDoc = {"_id": "software"} cmsswDoc[self.rerecoCreateArgs["ScramArch"]] = [] cmsswDoc[self.rerecoCreateArgs["ScramArch"]].append( self.rerecoCreateArgs["CMSSWVersion"]) insertDataToCouch(os.getenv("COUCHURL"), config.views.data.couch_reqmgr_aux_db, cmsswDoc) self.reqSvc = ReqMgr(self.jsonSender["host"]) self.reqSvc._noStale = True self.reqSvc['requests'].additionalHeaders = self.create_header
def __init__(self, config): """ Initialise class members """ BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.config = config self.jobCacheDir = self.config.JobCreator.jobCacheDir if getattr(self.config.TaskArchiver, "useWorkQueue", False) != False: # Get workqueue setup from config unless overridden if hasattr(self.config.TaskArchiver, 'WorkQueueParams'): self.workQueue = localQueue( **self.config.TaskArchiver.WorkQueueParams) else: from WMCore.WorkQueue.WorkQueueUtils import queueFromConfig self.workQueue = queueFromConfig(self.config) else: self.workQueue = None self.timeout = getattr(self.config.TaskArchiver, "timeOut", None) self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if not self.useReqMgrForCompletionCheck: #sets the local monitor summary couch db self.requestLocalCouchDB = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) self.centralCouchDBWriter = self.requestLocalCouchDB else: self.centralCouchDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL) self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only) self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) #Load the cleanout state ID and save it stateIDDAO = self.daoFactory(classname="Jobs.GetStateID") self.stateID = stateIDDAO.execute("cleanout") return
def __init__(self, queue, config): """ Initialise class members """ BaseWorkerThread.__init__(self) self.queue = queue self.config = config self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL) # state lists which shouldn't be populated in wmbs. (To prevent creating work before WQE status updated) self.abortedAndForceCompleteWorkflowCache = self.reqmgr2Svc.getAbortedAndForceCompleteRequestsFromMemoryCache( )
def __init__(self, queue, config): """ Initialise class members """ BaseWorkerThread.__init__(self) self.queue = queue self.config = config self.reqmgr2Svc = ReqMgr(self.config.General.ReqMgr2ServiceURL) myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.finishedWorflowCheck = daoFactory( classname="Subscriptions.CountFinishedSubscriptionsByWorkflow")
def __init__(self, microConfig, uniConfig, logger=None): """ Runs the basic setup and initialization for the MS Transferor module :param microConfig: microservice configuration """ self.msConfig = microConfig self.uConfig = uniConfig self.reqRecords = [] self.logger = getMSLogger(microConfig['verbose'], logger=logger) self.reqmgr2 = ReqMgr(microConfig['reqmgrUrl'], logger=self.logger) self.reqmgrAux = ReqMgrAux(microConfig['reqmgrUrl'], httpDict={'cacheduration': 60}, logger=self.logger) # eventually will change it to Rucio self.phedex = PhEDEx(httpDict={'cacheduration': 10 * 60}, dbsUrl=microConfig['dbsUrl'], logger=self.logger)
def setup(self, parameters): """ Called at startup """ # set the connection for local couchDB call self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0) self.wmstatsCouchDB = WMStatsWriter( self.config.TaskArchiver.localWMStatsURL, "WMStatsAgent") #TODO: we might need to use local db for Tier0 self.centralRequestDBReader = RequestDBReader( self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.useReqMgrForCompletionCheck: self.deletableState = "announced" self.centralRequestDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.centralRequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) if self.config.TaskArchiver.reqmgr2Only: self.reqmgr2Svc = ReqMgr( self.config.TaskArchiver.ReqMgr2ServiceURL) else: #TODO: remove this for reqmgr2 self.reqmgrSvc = RequestManager( {'endpoint': self.config.TaskArchiver.ReqMgrServiceURL}) else: # Tier0 case self.deletableState = "completed" # use local for update self.centralRequestDBWriter = RequestDBWriter( self.config.AnalyticsDataCollector.localT0RequestDBURL, couchapp=self.config.AnalyticsDataCollector.RequestCouchApp) jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url'] jobDBName = self.config.JobStateMachine.couchDBName self.jobCouchdb = CouchServer(jobDBurl) self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName) self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName) statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName self.statsumdatabase = self.jobCouchdb.connectDatabase( statSummaryDBName)
def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] # this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {}
def __init__(self, msConfig, **kwargs): """ Provides setup for MSTransferor and MSMonitor classes :param config: MS service configuration :param kwargs: can be used to skip the initialization of specific services, such as: logger: logger object skipReqMgr: boolean to skip ReqMgr initialization skipReqMgrAux: boolean to skip ReqMgrAux initialization skipRucio: boolean to skip Rucio initialization skipPhEDEx: boolean to skip PhEDEx initialization """ self.logger = getMSLogger(getattr(msConfig, 'verbose', False), kwargs.get("logger")) self.msConfig = msConfig self.logger.info("Configuration including default values:\n%s", self.msConfig) if not kwargs.get("skipReqMgr", False): self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger) if not kwargs.get("skipReqMgrAux", False): self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'], httpDict={'cacheduration': 1.0}, logger=self.logger) self.phedex = None self.rucio = None if self.msConfig.get('useRucio', False) and not kwargs.get("skipRucio", False): self.rucio = Rucio(acct=self.msConfig['rucioAccount'], hostUrl=self.msConfig['rucioUrl'], authUrl=self.msConfig['rucioAuthUrl'], configDict={ "logger": self.logger, "user_agent": "wmcore-microservices" }) elif not kwargs.get("skipPhEDEx", False): # hard code it to production DBS otherwise PhEDEx subscribe API fails to match TMDB data dbsUrl = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" self.phedex = PhEDEx(httpDict={'cacheduration': 0.5}, dbsUrl=dbsUrl, logger=self.logger)
def advanceStatus(self, config): """ Advance the request status based on the global workqueue elements status """ reqmgrSvc = ReqMgr(config.reqmgr2_url, logger=self.logger) gqService = WorkQueue(config.workqueue_url) self.logger.info("Getting GQ data for status check") wfStatusDict = gqService.getWorkflowStatusFromWQE() self.logger.info("Advancing statuses") if getattr(config, "enableMSStatusTransition", False): moveTransferorStatus(reqmgrSvc, self.logger) moveForwardStatus(reqmgrSvc, wfStatusDict, self.logger) moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, self.logger) self.logger.info("Done advancing status") return
def __init__(self, config=None, logger=None): """ Setup a bunch of things, like: * logger for this service * initialize all the necessary service helpers * fetch the unified configuration from central couch * update the unified configuration with some deployment and default settings * start both transfer and monitor threads :param config: reqmgr2ms service configuration :param logger: """ self.uConfig = {} self.config = config self.logger = getMSLogger(getattr(config, 'verbose', False), logger) self._parseConfig(config) self.logger.info("Configuration including default values:\n%s", self.msConfig) self.reqmgr2 = ReqMgr(self.msConfig['reqmgrUrl'], logger=self.logger) self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgrUrl'], httpDict={'cacheduration': 60}, logger=self.logger) # transferor has to look at workflows in assigned status self.msTransferor = MSTransferor(self.msConfig, "assigned", logger=self.logger) ### Last but not least, get the threads started thname = 'MSTransferor' self.transfThread = start_new_thread( thname, daemon, (self.transferor, 'assigned', self.msConfig['interval'], self.logger)) self.logger.debug("### Running %s thread %s", thname, self.transfThread.running()) thname = 'MSTransferorMonit' self.monitThread = start_new_thread( thname, daemon, (self.monitor, 'staging', self.msConfig['interval'] * 2, self.logger)) self.logger.debug("+++ Running %s thread %s", thname, self.monitThread.running())
def __init__(self, config): """ __init__ """ BaseWorkerThread.__init__(self) self.config = config self.bossAir = BossAirAPI(config=self.config) self.reqmgr2 = ReqMgr(self.config.General.ReqMgr2ServiceURL) self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl, self.config.WorkQueueManager.dbname) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listWorkflowsDAO = self.daoFactory(classname="Workflow.ListForJobUpdater") self.updateWorkflowPrioDAO = self.daoFactory(classname="Workflow.UpdatePriority") self.executingJobsDAO = self.daoFactory(classname="Jobs.GetNumberOfJobsForWorkflowTaskStatus")
def advanceStatus(self, config): """ gather active data statistics """ reqmgrSvc = ReqMgr(config.reqmgr2_url, logger=self.logger) gqService = WorkQueue(config.workqueue_url) wmstatsSvc = WMStatsServer(config.wmstats_url, logger=self.logger) self.logger.info("Getting GQ data for status check") wfStatusDict = gqService.getWorkflowStatusFromWQE() self.logger.info("Advancing status") moveForwardStatus(reqmgrSvc, wfStatusDict, self.logger) moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, self.logger) moveToArchived(wmstatsSvc, reqmgrSvc, config.archiveDelayHours, self.logger) self.logger.info("Done advancing status") return
def advanceStatus(self, config): """ Advance the request status based on the global workqueue elements status """ reqmgrSvc = ReqMgr(config.reqmgr2_url, logger=self.logger) gqService = WorkQueue(config.workqueue_url) wmstatsSvc = WMStatsServer(config.wmstats_url, logger=self.logger) logdb = LogDB(config.central_logdb_url, config.log_reporter) self.logger.info("Getting GQ data for status check") wfStatusDict = gqService.getWorkflowStatusFromWQE() self.logger.info("Advancing status") moveForwardStatus(reqmgrSvc, wfStatusDict, self.logger) moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, self.logger) moveToArchived(wmstatsSvc, reqmgrSvc, logdb, config.archiveDelayHours, self.logger) self.logger.info("Done advancing status") return
def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central reqmgr and local workqueue print "ReqMgr2 URL : %s" % sanitizeURL( config.JobUpdater.reqMgr2Url)['url'] print "WorkQueue URL: %s and dbname %s" % (sanitizeURL( config.WorkQueueManager.couchurl)['url'], config.WorkQueueManager.dbname) reqmgr2 = ReqMgr(config.JobUpdater.reqMgr2Url) workqueue = WorkQueue(config.WorkQueueManager.couchurl, config.WorkQueueManager.dbname) print "\nFirst attempt to update prio of wfs that are not in WMBS and only in local queue" priorityCache = {} workflowsToUpdate = {} workflowsToCheck = [x for x in workqueue.getAvailableWorkflows()] print "Retrieved %d workflows from workqueue" % len(workflowsToCheck) for workflow, priority in workflowsToCheck: if workflow not in priorityCache: try: priorityCache[workflow] = reqmgr2.getRequestByNames( workflow)[workflow]['RequestPriority'] except Exception, ex: print "Couldn't retrieve the priority of request %s" % workflow print "Error: %s" % ex continue if priority != priorityCache[workflow]: workflowsToUpdate[workflow] = priorityCache[workflow]
def __init__(self, config): BaseWorkerThread.__init__(self) myThread = threading.currentThread() self.config = config #DAO factory for WMBS objects self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) #Libraries self.resourceControl = ResourceControl() self.changeState = ChangeState(self.config) self.bossAir = BossAirAPI(config=self.config) self.hostName = self.config.Agent.hostName self.repollCount = getattr(self.config.JobSubmitter, 'repollCount', 10000) self.maxJobsPerPoll = int( getattr(self.config.JobSubmitter, 'maxJobsPerPoll', 1000)) self.maxJobsThisCycle = self.maxJobsPerPoll # changes as per schedd limit self.cacheRefreshSize = int( getattr(self.config.JobSubmitter, 'cacheRefreshSize', 30000)) self.skipRefreshCount = int( getattr(self.config.JobSubmitter, 'skipRefreshCount', 20)) self.packageSize = getattr(self.config.JobSubmitter, 'packageSize', 500) self.collSize = getattr(self.config.JobSubmitter, 'collectionSize', self.packageSize * 1000) self.maxTaskPriority = getattr(self.config.BossAir, 'maxTaskPriority', 1e7) self.condorFraction = 0.75 # update during every algorithm cycle self.condorOverflowFraction = 0.2 self.ioboundTypes = ('LogCollect', 'Merge', 'Cleanup', 'Harvesting') # Additions for caching-based JobSubmitter self.cachedJobIDs = set() self.cachedJobs = {} self.jobDataCache = {} self.jobsToPackage = {} self.sandboxPackage = {} self.locationDict = {} self.taskTypePrioMap = {} self.drainSites = set() self.abortSites = set() self.refreshPollingCount = 0 try: if not getattr(self.config.JobSubmitter, 'submitDir', None): self.config.JobSubmitter.submitDir = self.config.JobSubmitter.componentDir self.packageDir = os.path.join(self.config.JobSubmitter.submitDir, 'packages') if not os.path.exists(self.packageDir): os.makedirs(self.packageDir) except OSError as ex: msg = "Error while trying to create packageDir %s\n!" msg += str(ex) logging.error(msg) logging.debug("PackageDir: %s", self.packageDir) logging.debug("Config: %s", config) raise JobSubmitterPollerException(msg) # Now the DAOs self.listJobsAction = self.daoFactory( classname="Jobs.ListForSubmitter") self.setLocationAction = self.daoFactory(classname="Jobs.SetLocation") self.locationAction = self.daoFactory( classname="Locations.GetSiteInfo") self.setFWJRPathAction = self.daoFactory(classname="Jobs.SetFWJRPath") self.listWorkflows = self.daoFactory( classname="Workflow.ListForSubmitter") # Keep a record of the thresholds in memory self.currentRcThresholds = {} self.useReqMgrForCompletionCheck = getattr( self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True) if self.useReqMgrForCompletionCheck: # only set up this when reqmgr is used (not Tier0) self.reqmgr2Svc = ReqMgr(self.config.General.ReqMgr2ServiceURL) self.abortedAndForceCompleteWorkflowCache = self.reqmgr2Svc.getAbortedAndForceCompleteRequestsFromMemoryCache( ) self.reqAuxDB = ReqMgrAux(self.config.General.ReqMgr2ServiceURL) else: # Tier0 Case - just for the clarity (This private variable shouldn't be used self.abortedAndForceCompleteWorkflowCache = None return
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir, 'WMSpec.StdSpecs') self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize rest API statedir = '/tmp' app = RESTMain(config, statedir) # REST application mount = '/rest' # mount point for cherrypy service api = RestApiHub(app, config.reqmgr, mount) # initialize access to reqmgr2 APIs self.reqmgr = ReqMgr(config.reqmgr.reqmgr2_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # admin helpers self.admin_info = Info(app, api, config.reqmgr, mount=mount + '/info') self.admin_group = Group(app, api, config.reqmgr, mount=mount + '/group') self.admin_team = Team(app, api, config.reqmgr, mount=mount + '/team') # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_5_2_5') self.sw_arch = cdict.get('default_sw_scramarch', 'slc5_amd64_gcc434')
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception( 'ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = { key: val['default'] for key, val in self.assignArgs.items() }
class RequestLifeCycleBase_t(object): request = None request_name = None workqueue = None endpoint = os.environ.get('REQMGRBASEURL', 'https://localhost:8443') reqmgr = ReqMgr(endpoint + '/reqmgr2') team = 'TestTeam' _failure_detected = False @recordException def setUp(self): if self.__class__._failure_detected: raise nose.SkipTest # simple ping check - check reqmgr up tries = 0 while True: try: if not self.__class__.request: self.__class__.reqmgr.getTeam() break except: tries += 1 if tries >= 3: raise nose.SkipTest("Unable to contact reqmgr") time.sleep(15) def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey=key, cert=cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks( makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label) def _convertLabelsToId(self, config): fields = [ 'ProcConfigCacheID', 'Skim1ConfigCacheID', 'StepOneConfigCacheID', 'ConfigCacheID' ] for field in fields: if config.get(field): config[field] = self._configCacheId(config[field]) for field in ['Task1', 'Task2', 'Task3', 'Task4']: if config.get(field): config[field] = self._convertLabelsToId(config[field]) return config @attr("lifecycle") @recordException def test05InjectConfigs(self): """Inject configs to cache""" self.__class__.requestParams = self._convertLabelsToId( self.__class__.requestParams) @attr("lifecycle") @recordException def test10InjectRequest(self): """Can inject a request""" self.__class__.requestParams.setdefault('RequestString', self.__class__.__name__) tries = 0 while True: try: self.__class__.request = self.__class__.reqmgr.makeRequest( **self.__class__.requestParams )['WMCore.ReqMgr.DataStructs.Request'] self.__class__.request_name = self.__class__.request[ 'RequestName'] break except: tries += 1 if tries > 3: raise self.assertTrue(self.__class__.request) self.assertTrue(self.__class__.request_name) print("Injected request %s" % self.__class__.request_name) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'new') @attr("lifecycle") @recordException def test20ApproveRequest(self): """Approve request""" self.__class__.reqmgr.reportRequestStatus(self.__class__.request_name, 'assignment-approved') self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assignment-approved') # @attr("lifecycle") @recordException def test30AssignRequest(self): """Assign request""" self.__class__.reqmgr.assign(self.__class__.request_name, self.__class__.team, "Testing", "v1", MergedLFNBase='/store/temp', UnmergedLFNBase='/store/temp') self.__class__.request = self.reqmgr.getRequest( self.__class__.request_name) self.assertEqual(self.__class__.request['RequestStatus'], 'assigned') @attr("lifecycle") @recordException def test40WorkQueueAcquires(self): """WorkQueue picks up request""" if not self.__class__.request_name: raise nose.SkipTest start = time.time() while True: workqueue = self.reqmgr.getWorkQueue( request=self.__class__.request_name) if workqueue: self.__class__.workqueue = WorkQueue(workqueue[0]) self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertTrue( self.__class__.request['RequestStatus'] in ('acquired', 'running')) request = [x for x in self.__class__.workqueue.getElementsCountAndJobsByWorkflow() if \ x == self.__class__.request_name] if [ x for x in request if x['status'] in ('Available', 'Negotiating', 'Acquired', 'Running') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for workqueue to acquire') time.sleep(15) @attr("lifecycle") @recordException def test50AgentAcquires(self): """Elements acquired by agent""" # skip if request already running self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] == 'running': raise nose.SkipTest start = time.time() while True: request = [x for x in self.__class__.workqueue.getElementsCountAndJobsByWorkflow() if \ x == self.__class__.request_name] if [x for x in request if x['status'] in ('Acquired', 'Running')]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for agent to acquire') time.sleep(15) self.assertTrue( [x for x in request if x['status'] in ('Acquired', 'Running')]) @attr("lifecycle") @recordException def test60RequestRunning(self): """Request running""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getElementsCountAndJobsByWorkflow() if \ x == self.__class__.request_name] childQueue = [x for x in self.__class__.workqueue.getChildQueuesByRequest() if \ x['request_name'] == self.__class__.request_name] if request and 'Running' in [x['status'] for x in request]: self.assertTrue(childQueue, "Running but can't get child queue") break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to run') time.sleep(15) @attr("lifecycle") @recordException def test70WorkQueueFinished(self): """Request completed in workqueue""" start = time.time() while True: request = [x for x in self.__class__.workqueue.getElementsCountAndJobsByWorkflow() if \ x == self.__class__.request_name] # request deleted from wq shortly after finishing, so may not appear here if not request or request == [ x for x in request if x['status'] in ('Done', 'Failed', 'Canceled') ]: break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test80RequestFinished(self): """Request completed""" start = time.time() while True: self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) if self.__class__.request['RequestStatus'] in ('completed', 'failed', 'aborted'): break if start + (60 * 20) < time.time(): raise RuntimeError('timeout waiting for request to finish') time.sleep(15) @attr("lifecycle") @recordException def test90RequestCloseOut(self): """Closeout request""" self.reqmgr.reportRequestStatus(self.__class__.request_name, "closed-out") self.__class__.request = self.__class__.reqmgr.getRequest( self.__class__.request_name) self.assertEqual('closed-out', self.__class__.request['RequestStatus'])