def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) self.schema = [] self.couchApps = ["LogDB"] self.testInit = TestInitCouchApp('LogDBTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'logdb_t' self.testInit.setupCouch(dbName, *self.couchApps) url = "%s/%s" % (self.testInit.couchUrl, dbName) identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.agent_inst2 = LogDB(url, identifier, logger=self.logger, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.report = LogDBReport(self.agent_inst)
def logDBCleanUp(self, config): """ gather active data statistics """ logdb = LogDB(config.central_logdb_url, config.log_reporter) logdb.cleanup(config.logDBCleanDuration) cherrypy.log("cleaned up log db") return
def logDBCleanUp(self, config): """ gather active data statistics """ logdb = LogDB(config.central_logdb_url, config.log_reporter) logdb.cleanup(config.logDBCleanDuration) self.logger.info("cleaned up log db") return
def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) url = 'http://localhost:5984/logdb_t' identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, create=True) self.agent_inst2 = LogDB(url, identifier, logger=self.logger, create=True, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, create=True) self.report = LogDBReport(self.agent_inst)
def setUpLogDB(self, config): if hasattr(config, "central_logdb_url"): # default set up for logDB config section need to contain propervalues from WMCore.Services.LogDB.LogDB import LogDB self.logDB = LogDB(config.central_logdb_url, config.log_reporter, thread_name=config.object.rsplit(".", 1)[-1]) else: self.logDB = None
def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] # this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {}
def setUpLogDB(self, myThread): # setup logDB if hasattr(self.component.config, "General") and \ hasattr(self.component.config.General, "central_logdb_url") and \ hasattr(self.component.config, "Agent"): from WMCore.Services.LogDB.LogDB import LogDB myThread.logdbClient = LogDB(self.component.config.General.central_logdb_url, self.component.config.Agent.hostName, logger=logging) else: myThread.logdbClient = None return
def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) url = os.environ.get('COUCHURL', 'http://localhost:5984') + '/logdb_t' identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, create=True) self.agent_inst2 = LogDB(url, identifier, logger=self.logger, create=True, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, create=True) self.report = LogDBReport(self.agent_inst)
def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) self.schema = [] self.couchApps = ["LogDB"] self.testInit = TestInitCouchApp('LogDBTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'logdb_t' self.testInit.setupCouch(dbName, *self.couchApps) url = "%s/%s" % (self.testInit.couchUrl, dbName) identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.agent_inst2 = LogDB(url, identifier, logger=self.logger, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.report = LogDBReport(self.agent_inst)
def advanceStatus(self, config): """ Advance the request status based on the global workqueue elements status """ reqmgrSvc = ReqMgr(config.reqmgr2_url, logger=self.logger) gqService = WorkQueue(config.workqueue_url) wmstatsSvc = WMStatsServer(config.wmstats_url, logger=self.logger) logdb = LogDB(config.central_logdb_url, config.log_reporter) self.logger.info("Getting GQ data for status check") wfStatusDict = gqService.getWorkflowStatusFromWQE() self.logger.info("Advancing status") moveForwardStatus(reqmgrSvc, wfStatusDict, self.logger) moveToCompletedForNoWQJobs(reqmgrSvc, wfStatusDict, self.logger) moveToArchived(wmstatsSvc, reqmgrSvc, logdb, config.archiveDelayHours, self.logger) self.logger.info("Done advancing status") return
class LogDBTasks(CherryPyPeriodicTask): def __init__(self, rest, config): super(LogDBTasks, self).__init__(config) self.logdb = LogDB(config.central_logdb_url, config.log_reporter) def setConcurrentTasks(self, config): """ Set logDBCleanUp method to be executed in a polling cycle """ self.concurrentTasks = [{ 'func': self.logDBCleanUp, 'duration': config.logDBCleanDuration }] def logDBCleanUp(self, config): """ gather active data statistics """ self.logger.info("Cleaning documents from LogDB WMStats") docs = self.logdb.cleanup(config.keepDocsAge) self.logger.info("Deleted %d old documents", len(docs)) return
class ReqMgrService(TemplatedPage): """ Request Manager web service class """ def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir, 'WMSpec.StdSpecs') self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += ['application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript'] cherryconf.update({'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize rest API statedir = '/tmp' app = RESTMain(config, statedir) # REST application mount = '/rest' # mount point for cherrypy service api = RestApiHub(app, config.reqmgr, mount) # initialize access to reqmgr2 APIs self.reqmgr = ReqMgr(config.reqmgr.reqmgr2_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # admin helpers self.admin_info = Info(app, api, config.reqmgr, mount=mount + '/info') self.admin_group = Group(app, api, config.reqmgr, mount=mount + '/group') self.admin_team = Team(app, api, config.reqmgr, mount=mount + '/team') # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) def user(self): """ Return user name associated with this instance. """ try: return cherrypy.request.user['login'] except: return 'testuser' def user_dn(self): "Return user DN" try: return cherrypy.request.user['dn'] except: return '/CN/bla/foo' def update_scripts(self, force=False): "Update scripts dict" if force or abs(time.time() - self.sdict['ts']) > self.sdict_thr: for item in os.listdir(self.sdir): with open(os.path.join(self.sdir, item), 'r') as istream: self.sdict[item.split('.')[0]] = istream.read() self.sdict['ts'] = time.time() def abs_page(self, tmpl, content): """generate abstract page""" menu = self.templatepage('menu', menus=menus(), tmpl=tmpl) body = self.templatepage('generic', menu=menu, content=content) page = self.templatepage('main', content=body, user=self.user()) return page def page(self, content): """ Provide page wrapped with top/bottom templates. """ return self.templatepage('main', content=content) def error(self, content): "Generate common error page" content = self.templatepage('error', content=content) return self.abs_page('error', content) @expose def index(self, **kwds): """Main page""" content = self.templatepage('index', requests=ACTIVE_STATUS, rdict=REQUEST_STATE_TRANSITION) return self.abs_page('main', content) @expose def home(self, **kwds): """Main page""" return self.index(**kwds) ### Admin actions ### @expose def admin(self, **kwds): """admin page""" print("\n### ADMIN PAGE") rows = self.admin_info.get() print("rows", [r for r in rows]) content = self.templatepage('admin') return self.abs_page('admin', content) @expose def add_user(self, **kwds): """add_user action""" rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) @expose def add_group(self, **kwds): """add_group action""" rows = self.admin_group.get() print("\n### GROUPS", [r for r in rows]) rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) @expose def add_team(self, **kwds): """add_team action""" rows = self.admin_team.get() print("\n### TEAMS", kwds, [r for r in rows]) print("request to add", kwds) rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) ### Request actions ### @expose @checkargs(['status', 'sort']) def assign(self, **kwds): """assign page""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'assignment-approved'}) docs = [] attrs = ['RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus'] data = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for key, val in data.items(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] misc_json = {'CMSSW Releases': releases(), 'CMSSW architectures': architectures(), 'SubscriptionPriority': ['Low', 'Normal', 'High'], 'CustodialSubType': ['Move', 'Replica'], 'NonCustodialSubType': ['Move', 'Replica'], 'MinMergeSize': 2147483648, 'MaxMergeSize': 4294967296, 'MaxMergeEvents': 50000, 'MaxRSS': 20411724, 'MaxVSize': 20411724, 'SoftTimeout': 129600, 'GracePeriod': 300, 'BlockCloseMaxWaitTime': 66400, 'BlockCloseMaxFiles': 500, 'BlockCloseMaxEvents': 250000000, 'BlockCloseMaxSize': 5000000000000, 'AcquisitionEra': '', 'ProcessingVersion': 1, 'ProcessingString': '', 'MergedLFNBase': lfn_bases(), 'UnmergedLFNBase': lfn_unmerged_bases(), } filter_sort = self.templatepage('filter_sort') content = self.templatepage('assign', sort=sortby, filter_sort_table=filter_sort, sites=sites(), site_white_list=site_white_list(), site_black_list=site_black_list(), user=self.user(), user_dn=self.user_dn(), requests=docs, misc_table=json2table(misc_json, web_ui_names()), misc_json=json2form(misc_json, indent=2, keep_first_value=True)) return self.abs_page('assign', content) @expose @checkargs(['status', 'sort']) def approve(self, **kwds): """ Approve page: get list of request associated with user DN. Fetch their status list from ReqMgr and display if requests were seen by data-ops. """ if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'new'}) kwds.update({'_nostale': True}) docs = [] attrs = ['RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus'] data = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for key, val in data.items(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('approve', requests=docs, date=tstamp(), sort=sortby, filter_sort_table=filter_sort) return self.abs_page('approve', content) @expose def create(self, **kwds): """create page""" # get list of standard specs from WMCore and new ones from local area # loc_specs_dir = os.path.join(self.spdir, 'Specs') # local specs # loc_specs = spec_list(loc_specs_dir, 'Specs') # all_specs = list(set(self.std_specs + loc_specs)) # all_specs.sort() all_specs = list(self.std_specs) spec = kwds.get('form', '') if not spec: spec = self.std_specs[0] # make spec first in all_specs list if spec in all_specs: all_specs.remove(spec) all_specs = [spec] + all_specs jsondata = get_request_template_from_type(spec) # create templatized page out of provided forms self.update_scripts() content = self.templatepage('create', table=json2table(jsondata, web_ui_names()), jsondata=json2form(jsondata, indent=2, keep_first_value=True), name=spec, scripts=[s for s in self.sdict.keys() if s != 'ts'], specs=all_specs) return self.abs_page('create', content) def generate_objs(self, script, jsondict): """Generate objects from givem JSON template""" self.update_scripts() code = self.sdict.get(script, '') if code.find('def genobjs(jsondict)') == -1: return self.error( "Improper python snippet, your code should start with <b>def genobjs(jsondict)</b> function") exec (code) # code snippet must starts with genobjs function return [r for r in genobjs(jsondict)] @expose def config(self, name, **kwds): "Fetch config for given request name" return self.reqmgr.getConfig(name).replace('\n', '<br/>') @expose def fetch(self, rid, **kwds): "Fetch document for given id" rid = rid.replace('request-', '') doc = self.reqmgr.getRequestByNames(rid) transitions = [] tstamp = time.time() # get request tasks tasks = self.reqmgr.getRequestTasks(rid) if len(doc) == 1: try: doc = doc[rid] except: pass name = doc.get('RequestName', 'NA') title = 'Request %s' % name status = doc.get('RequestStatus', '') transitions = REQUEST_STATE_TRANSITION.get(status, []) if status in transitions: transitions.remove(status) visible_attrs = get_modifiable_properties(status) content = self.templatepage('doc', title=title, status=status, name=name, rid=rid, tasks=json2form(tasks, indent=2, keep_first_value=False), table=json2table(doc, web_ui_names(), visible_attrs), jsondata=json2form(doc, indent=2, keep_first_value=False), transitions=transitions, ts=tstamp, user=self.user(), userdn=self.user_dn()) elif len(doc) > 1: jsondata = [pprint.pformat(d) for d in doc] content = self.templatepage('doc', title='Series of docs: %s' % rid, table="", jsondata=jsondata, transitions=transitions, ts=tstamp, user=self.user(), userdn=self.user_dn()) else: doc = 'No request found for name=%s' % rid return self.abs_page('request', content) @expose def record2logdb(self, **kwds): """LogDB submission page""" print(kwds) request = kwds['request'] msg = kwds['message'] self.logdb.post(request, msg) msg = '<h6>Confirmation</h6>Your request has been entered to LogDB.' return self.abs_page('generic', msg) @expose def requests(self, **kwds): """Page showing requests""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'acquired'}) results = self.reqmgr.getRequestByStatus(kwds['status']) docs = [] for key, doc in results.items(): docs.append(request_attr(doc)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('requests', requests=docs, sort=sortby, status=kwds['status'], filter_sort_table=filter_sort) return self.abs_page('requests', content) @expose def request(self, **kwargs): "Get data example and expose it as json" dataset = kwargs.get('uinput', '') if not dataset: return {'error': 'no input dataset'} url = 'https://cmsweb.cern.ch/reqmgr/rest/outputdataset/%s' % dataset params = {} headers = {'Accept': 'application/json;text/json'} wdata = getdata(url, params) wdict = dict(date=time.ctime(), team='Team-A', status='Running', ID=genid(wdata)) winfo = self.templatepage('workflow', wdict=wdict, dataset=dataset, code=pprint.pformat(wdata)) content = self.templatepage('search', content=winfo) return self.abs_page('request', content) @expose def batch(self, **kwds): """batch page""" # TODO: we need a template for batch attributes # and read it from separate area, like DASMaps name = kwds.get('name', '') batch = {} if name: # batch = self.reqmgr.getBatchesByName(name) batch = {'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Attributes': {'HeavyIon': ['true', 'false']}} attributes = batch.get('Attributes', {}) workflows = batch.get('Workflows', []) description = batch.get('Description', '') creator = batch.get('Creator', self.user_dn()) content = self.templatepage('batch', name=name, attributes=json2table(attributes, web_ui_names()), workflows=workflows, creator=creator, description=description) return self.abs_page('batch', content) @expose def batches(self, **kwds): """Page showing batches""" if not kwds: kwds = {} if 'name' not in kwds: kwds.update({'name': ''}) sortby = kwds.get('sort', 'name') # results = self.reqmgr.getBatchesByName(kwds['name']) results = [ {'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 13 10:36:41 EST 2015', 'Attributes': {'HeavyIon': ['true', 'false']}}, {'Name': 'Batch2', 'Description': 'lksdjflksjdf', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 10 10:36:41 EST 2015', 'Attributes': {'HeavyIon': ['true', 'false']}}, ] docs = [r for r in sort(results, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('batches', batches=docs, sort=sortby, filter_sort_table=filter_sort) return self.abs_page('batches', content) ### Aux methods ### @expose def put_request(self, *args, **kwds): "PUT request callback to reqmgr server, should be used in AJAX" reqname = kwds.get('RequestName', '') status = kwds.get('RequestStatus', '') if not reqname: msg = 'Unable to update request status, empty request name' raise cherrypy.HTTPError(406, msg) if not status: msg = 'Unable to update request status, empty status value' raise cherrypy.HTTPError(406, msg) return self.reqmgr.updateRequestStatus(reqname, status) @expose def images(self, *args, **kwargs): """ Serve static images. """ args = list(args) self.check_scripts(args, self.imgmap, self.imgdir) mime_types = ['*/*', 'image/gif', 'image/png', 'image/jpg', 'image/jpeg'] accepts = cherrypy.request.headers.elements('Accept') for accept in accepts: if accept.value in mime_types and len(args) == 1 \ and args[0] in self.imgmap: image = self.imgmap[args[0]] # use image extension to pass correct content type ctype = 'image/%s' % image.split('.')[-1] cherrypy.response.headers['Content-type'] = ctype return serve_file(image, content_type=ctype) def serve(self, kwds, imap, idir, datatype='', minimize=False): "Serve files for high level APIs (yui/css/js)" args = [] for key, val in kwds.items(): if key == 'f': # we only look-up files from given kwds dict if isinstance(val, list): args += val else: args.append(val) scripts = self.check_scripts(args, imap, idir) return self.serve_files(args, scripts, imap, datatype, minimize) @exposecss @tools.gzip() def css(self, **kwargs): """ Serve provided CSS files. They can be passed as f=file1.css&f=file2.css """ resource = kwargs.get('resource', 'css') if resource == 'css': return self.serve(kwargs, self.cssmap, self.cssdir, 'css', True) @exposejs @tools.gzip() def js(self, **kwargs): """ Serve provided JS scripts. They can be passed as f=file1.js&f=file2.js with optional resource parameter to speficy type of JS files, e.g. resource=yui. """ resource = kwargs.get('resource', 'js') if resource == 'js': return self.serve(kwargs, self.jsmap, self.jsdir) def serve_files(self, args, scripts, resource, datatype='', minimize=False): """ Return asked set of files for JS, YUI, CSS. """ idx = "-".join(scripts) if idx not in self._cache.keys(): data = '' if datatype == 'css': data = '@CHARSET "UTF-8";' for script in args: path = os.path.join(sys.path[0], resource[script]) path = os.path.normpath(path) ifile = open(path) data = "\n".join([data, ifile.read(). \ replace('@CHARSET "UTF-8";', '')]) ifile.close() if datatype == 'css': set_headers("text/css") if minimize: self._cache[idx] = minify(data) else: self._cache[idx] = data return self._cache[idx] def check_scripts(self, scripts, resource, path): """ Check a script is known to the resource map and that the script actually exists """ for script in scripts: if script not in resource.keys(): spath = os.path.normpath(os.path.join(path, script)) if os.path.isfile(spath): resource.update({script: spath}) return scripts
class LogDBTest(unittest.TestCase): """ _LogDBTest_ """ def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) url = 'http://localhost:5984/logdb_t' identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, create=True) self.agent_inst2 = LogDB(url, identifier, logger=self.logger, create=True, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, create=True) self.report = LogDBReport(self.agent_inst) def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.agent_inst.backend.deleteDatabase() def test_docid(self): "Test docid API" request = 'abs' mtype = self.agent_inst.backend.prefix('info') res = self.agent_inst.backend.docid(request, mtype) key = '--'.join([request, self.agent_inst.identifier, self.agent_inst.thread_name, mtype]) khash = gen_hash(key) self.assertEqual(res, khash) def test_prefix(self): "Test prefix LogDBBackend API" request = 'abc' self.assertEqual(self.agent_inst.agent, 1) mtype = self.agent_inst.backend.prefix('msg') self.assertEqual(mtype, 'agent-msg') self.assertEqual(self.user_inst.agent, 0) mtype = self.user_inst.backend.prefix('msg') self.assertEqual(mtype, 'msg') def test_apis(self): "Test LogDB APIs" request = 'abc' # if we post messages for the same request only last one should survive self.agent_inst.post(request, 'msg1') # time.sleep(1) self.agent_inst.post(request, 'msg2') # time.sleep(1) self.agent_inst.post(request, 'msg3') # time.sleep(1) self.agent_inst.post(request, 'msg4') # time.sleep(1) docs = self.agent_inst.get(request) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['msg'], 'msg4') # add message as user, user based messages will grow self.user_inst.post(request, 'doc1') # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 2) # we have msg4 and doc1 self.user_inst.post(request, 'doc1') # we have msg2 and two doc1 messages # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 3) def test_cleanup(self): "Test clean-up LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'comment') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') all_docs = self.agent_inst.get(request) self.agent_inst.backend.cleanup(thr=10) # look into past past_docs = self.agent_inst.get(request) self.assertEqual(len(all_docs), len(past_docs)) self.agent_inst.backend.cleanup(thr=-10) # look into future docs = self.agent_inst.get(request) self.assertEqual(len(docs), 0) def test_get_all_requests(self): "Test get_all_requests LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') sum_docs = self.agent_inst.get(request) self.assertEqual(len(sum_docs), 3) # since we have two info records request = 'abc2' self.agent_inst.post(request, 'msg1', 'info') all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 2) # we only have two distinct request names def test_delete(self): "Test delete LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.delete(request) all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 0) def test_get(self): "Test get LogDB API" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') docs = self.agent_inst.get(request) self.assertEqual(len(docs), 4) # there should be 4 docs one for Test thread and 3 for MainThread docs = self.agent_inst2.get(request) self.assertEqual(len(docs), 4) # this should result the same as above. docs1 = self.agent_inst.get(request, 'info') docs2 = self.agent_inst.get(request, 'info') req1 = set([r['request'] for r in docs1]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in docs2]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) def test_thread_name(self): "Test thread name support by LogDB APIs" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') maindocs = self.agent_inst.get(request) self.assertEqual(len(maindocs), 4) # since we have two info records in agent_inst thr_docs = self.agent_inst2.get(request) self.assertEqual(len(thr_docs), 4) # number of docs in different thread req1 = set([r['request'] for r in maindocs]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in thr_docs]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) worker1 = set([r['thr'] for r in maindocs]) self.assertEqual(worker1, set(["Test", "MainThread"])) worker2 = set([r['thr'] for r in thr_docs]) self.assertEqual(worker2, set(["Test", "MainThread"])) docs = set([r['identifier'] for r in maindocs]) self.assertEqual(docs, set([self.agent_inst.identifier])) docs = set([r['identifier'] for r in thr_docs]) self.assertEqual(docs, set([self.agent_inst.identifier])) def test_checks(self): "Tests LogDB check/mtype functionality" request = 'abc' res = self.agent_inst.post(request, 'msg', 'ingo') # should be silent, i.o. no Exceptions self.assertEqual('post-error', res) args = (request, 'msg', 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = (request, 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) args = ('', 'msg', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = ('', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) def test_report(self): "Test LogDBReport APIs" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') time.sleep(1) self.agent_inst.post(request, 'msg2', 'comment') time.sleep(1) self.agent_inst.post(request, 'msg3', 'warning') time.sleep(1) self.agent_inst.post(request, 'msg4', 'error') time.sleep(1) self.report.to_stdout(request) out = self.report.to_txt(request) print "\n### txt report\n", out out = self.report.to_html(request) print "\n### html report\n", out docs = self.agent_inst.get(request) tdocs = self.report.orderby(docs, order='ts') messages = [d['msg'] for d in tdocs] self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1'])
def __call__(self, parameters): """ Thread entry point; handles synchronisation with run and terminate conditions """ try: msg = "Initialising worker thread %s" % str(self) logging.info(msg) # Call thread startup method self.initInThread(parameters) msg = "Worker thread %s started" % str(self) logging.info(msg) # heartbeat needed to be called after self.initInThread # to get the right name myThread = threading.currentThread() if hasattr(self.component.config, "Agent"): self.useHeartbeat = getattr(self.component.config.Agent, "useHeartbeat", True) self.workerName = myThread.getName() if hasattr(self.component.config, "General") and \ hasattr(self.component.config.General, "central_logdb_url") and \ hasattr(self.component.config, "Agent"): from WMCore.Services.LogDB.LogDB import LogDB myThread.logdbClient = LogDB( self.component.config.General.central_logdb_url, self.component.config.Agent.hostName, logger=logging) else: myThread.logdbClient = None if self.useHeartbeat: self.heartbeatAPI.updateWorkerHeartbeat(self.workerName) # Run event loop while termination is not flagged while not self.notifyTerminate.isSet(): # Check manager hasn't paused threads if self.notifyPause.isSet(): self.notifyResume.wait() else: # Catch case where threads were paused and then terminated # - threads should not run in this case! if not self.notifyTerminate.isSet(): # Do some work! try: try: # heartbeat needed to be called after self.initInThread # to get the right name if self.useHeartbeat: self.heartbeatAPI.updateWorkerHeartbeat( self.workerName, "Running") except Exception as ex: msg = " Failed to update heartbeat for worker %s" % str( self) msg += ":\n %s" % str(ex) msg += "\n Skipping worker algorithm!" logging.error(msg) else: self.algorithm(parameters) # Catch if someone forgets to commit/rollback if myThread.transaction.transaction is not None: msg = """ Thread %s: Transaction reached end of poll loop.""" % self.workerName msg += " Raise a bug against me. Rollback." logging.error(msg) myThread.transaction.rollback() except Exception as ex: if myThread.transaction.transaction is not None: myThread.transaction.rollback() msg = "Error in worker algorithm (1):\nBacktrace:\n " msg += (" %s %s" % (str(self), str(ex))) stackTrace = traceback.format_tb( sys.exc_info()[2], None) for stackFrame in stackTrace: msg += stackFrame logging.error(msg) # force entire component to terminate try: self.component.prepareToStop() except Exception as ex: logging.error( "Failed to halt component after worker crash: %s", str(ex)) if self.useHeartbeat: self.heartbeatAPI.updateWorkerError( self.workerName, msg) raise ex # Put the thread to sleep self.sleepThread() # Call specific thread termination method self.terminate(parameters) except Exception as ex: # Notify error msg = "Error in event loop (2): %s %s\nBacktrace:\n" msg = msg % (str(self), str(ex)) stackTrace = traceback.format_tb(sys.exc_info()[2], None) for stackFrame in stackTrace: msg += stackFrame logging.error(msg) # Indicate to manager that thread is done self.terminateCallback(threading.currentThread().name) # All done msg = "Worker thread %s terminated" % str(self) logging.info(msg)
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir, 'WMSpec.StdSpecs') self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += ['application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript'] cherryconf.update({'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize rest API statedir = '/tmp' app = RESTMain(config, statedir) # REST application mount = '/rest' # mount point for cherrypy service api = RestApiHub(app, config.reqmgr, mount) # initialize access to reqmgr2 APIs self.reqmgr = ReqMgr(config.reqmgr.reqmgr2_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # admin helpers self.admin_info = Info(app, api, config.reqmgr, mount=mount + '/info') self.admin_group = Group(app, api, config.reqmgr, mount=mount + '/group') self.admin_team = Team(app, api, config.reqmgr, mount=mount + '/team') # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier)
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir, 'WMSpec.StdSpecs') self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize rest API statedir = '/tmp' app = RESTMain(config, statedir) # REST application mount = '/rest' # mount point for cherrypy service api = RestApiHub(app, config.reqmgr, mount) # initialize access to reqmgr2 APIs self.reqmgr = ReqMgr(config.reqmgr.reqmgr2_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # admin helpers self.admin_info = Info(app, api, config.reqmgr, mount=mount + '/info') self.admin_group = Group(app, api, config.reqmgr, mount=mount + '/group') self.admin_team = Team(app, api, config.reqmgr, mount=mount + '/team') # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_5_2_5') self.sw_arch = cdict.get('default_sw_scramarch', 'slc5_amd64_gcc434') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier)
class LogDBTest(unittest.TestCase): """ _LogDBTest_ """ def setUp(self): """ _setUp_ Setup the database and logging connection. """ logging.basicConfig() self.logger = logging.getLogger('LogDBTest') # self.logger.setLevel(logging.DEBUG) self.schema = [] self.couchApps = ["LogDB"] self.testInit = TestInitCouchApp('LogDBTest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'logdb_t' self.testInit.setupCouch(dbName, *self.couchApps) url = "%s/%s" % (self.testInit.couchUrl, dbName) identifier = 'agentname' self.agent_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.agent_inst2 = LogDB(url, identifier, logger=self.logger, thread_name="Test") identifier = '/DC=org/DC=doegrids/OU=People/CN=First Last 123' self.user_inst = LogDB(url, identifier, logger=self.logger, thread_name="MainThread") self.report = LogDBReport(self.agent_inst) def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def test_docid(self): "Test docid API" request = 'abs' mtype = self.agent_inst.backend.prefix('info') res = self.agent_inst.backend.docid(request, mtype) key = '--'.join([ request, self.agent_inst.identifier, self.agent_inst.thread_name, mtype ]) khash = gen_hash(key) self.assertEqual(res, khash) def test_prefix(self): "Test prefix LogDBBackend API" self.assertEqual(self.agent_inst.agent, 1) mtype = self.agent_inst.backend.prefix('msg') self.assertEqual(mtype, 'agent-msg') self.assertEqual(self.user_inst.agent, 0) mtype = self.user_inst.backend.prefix('msg') self.assertEqual(mtype, 'msg') def test_apis(self): "Test LogDB APIs" request = 'abc' # if we post messages for the same request only last one should survive self.agent_inst.post(request, 'msg1') # time.sleep(1) self.agent_inst.post(request, 'msg2') # time.sleep(1) self.agent_inst.post(request, 'msg3') # time.sleep(1) self.agent_inst.post(request, 'msg4') # time.sleep(1) docs = self.agent_inst.get(request) self.assertEqual(len(docs), 1) self.assertEqual(docs[0]['msg'], 'msg4') # add message as user, user based messages will grow self.user_inst.post(request, 'doc1') # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 2) # we have msg4 and doc1 self.user_inst.post(request, 'doc1') # we have msg2 and two doc1 messages # time.sleep(1) docs = self.user_inst.get(request) self.assertEqual(len(docs), 3) def test_cleanup(self): "Test clean-up LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'comment') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') all_docs = self.agent_inst.get(request) self.agent_inst.backend.cleanup(thr=10) # look into past past_docs = self.agent_inst.get(request) self.assertEqual(len(all_docs), len(past_docs)) self.agent_inst.backend.cleanup(thr=-10) # look into future docs = self.agent_inst.get(request) self.assertEqual(len(docs), 0) def test_get_all_requests(self): "Test get_all_requests LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') sum_docs = self.agent_inst.get(request) self.assertEqual(len(sum_docs), 3) # since we have two info records request = 'abc2' self.agent_inst.post(request, 'msg1', 'info') all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 2) # we only have two distinct request names def test_delete(self): "Test delete LogDB API" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.delete(request) all_docs = self.agent_inst.get_all_requests() self.assertEqual(len(all_docs), 0) def test_get(self): "Test get LogDB API" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') docs = self.agent_inst.get(request) self.assertEqual( len(docs), 4 ) # there should be 4 docs one for Test thread and 3 for MainThread docs = self.agent_inst2.get(request) self.assertEqual(len(docs), 4) # this should result the same as above. docs1 = self.agent_inst.get(request, 'info') docs2 = self.agent_inst.get(request, 'info') req1 = set([r['request'] for r in docs1]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in docs2]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) def test_thread_name(self): "Test thread name support by LogDB APIs" request = 'abc' self.agent_inst2.post(request, 'msg1', 'info') # doc with different thread name self.agent_inst.post(request, 'msg1', 'info') self.agent_inst.post(request, 'msg2', 'info') self.agent_inst.post(request, 'msg3', 'warning') self.agent_inst.post(request, 'msg4', 'error') maindocs = self.agent_inst.get(request) self.assertEqual(len(maindocs), 4) # since we have two info records in agent_inst thr_docs = self.agent_inst2.get(request) self.assertEqual(len(thr_docs), 4) # number of docs in different thread req1 = set([r['request'] for r in maindocs]) self.assertEqual(len(req1), 1) req2 = set([r['request'] for r in thr_docs]) self.assertEqual(len(req2), 1) self.assertEqual(req1, req2) worker1 = set([r['thr'] for r in maindocs]) self.assertEqual(worker1, set(["Test", "MainThread"])) worker2 = set([r['thr'] for r in thr_docs]) self.assertEqual(worker2, set(["Test", "MainThread"])) docs = set([r['identifier'] for r in maindocs]) self.assertEqual(docs, set([self.agent_inst.identifier])) docs = set([r['identifier'] for r in thr_docs]) self.assertEqual(docs, set([self.agent_inst.identifier])) def test_checks(self): "Tests LogDB check/mtype functionality" request = 'abc' res = self.agent_inst.post( request, 'msg', 'ingo') # should be silent, i.o. no Exceptions self.assertEqual('post-error', res) args = (request, 'msg', 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = (request, 'ingo') # wrong type self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) args = ('', 'msg', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.agent_update, *args) args = ('', 'info') # empty request string self.assertRaises(LogDBError, self.agent_inst.backend.check, *args) def test_report(self): "Test LogDBReport APIs" request = 'abc' self.agent_inst.post(request, 'msg1', 'info') time.sleep(1) self.agent_inst.post(request, 'msg2', 'comment') time.sleep(1) self.agent_inst.post(request, 'msg3', 'warning') time.sleep(1) self.agent_inst.post(request, 'msg4', 'error') time.sleep(1) self.report.to_stdout(request) out = self.report.to_txt(request) print("\n### txt report\n", out) out = self.report.to_html(request) print("\n### html report\n", out) docs = self.agent_inst.get(request) tdocs = self.report.orderby(docs, order='ts') messages = [d['msg'] for d in tdocs] self.assertEqual(messages, ['msg4', 'msg3', 'msg2', 'msg1']) def test_heartbeat(self): "Test heartbeat_report function" threadName = 'MainThread' failThread = 'FailThread' self.agent_inst.post(msg="Test heartbeat", mtype="info") time.sleep(1) self.agent_inst.post(msg="Test heartbeatError", mtype="error") report = self.agent_inst.heartbeat_report() self.assertEqual(report[threadName]['type'], 'agent-error') report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [threadName]) self.agent_inst.post(msg="Test heartbeat", mtype="info") report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], []) report = self.agent_inst.wmstats_down_components_report( [threadName, failThread]) self.assertEqual(report['down_components'], [failThread]) self.agent_inst.post(msg="Test heartbeatError2", mtype="error") report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [threadName]) self.agent_inst.delete(mtype="error", this_thread=True) report = self.agent_inst.wmstats_down_components_report([threadName]) self.assertEqual(report['down_components'], [])
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception( 'ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = { key: val['default'] for key, val in self.assignArgs.items() }
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += ['application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript'] cherryconf.update({'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception('ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = {key: val['default'] for key, val in self.assignArgs.items()}
class ReqMgrService(TemplatedPage): """ Request Manager web service class """ def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += ['application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript'] cherryconf.update({'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception('ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = {key: val['default'] for key, val in self.assignArgs.items()} def getTeams(self): "Helper function to get teams from wmstats or local cache" teams = self.team_cache url = '%s/data/teams' % self.wmstatsurl params = {} headers = {'Accept': 'application/json'} try: data = getdata(url, params, headers) if 'error' in data: print("WARNING: fail to get teams from %s" % url) print(data) teams = data.get('result', []) self.team_cache = teams except Exception as exp: print("WARNING: fail to get teams from %s" % url) print(str(exp)) return teams def update_scripts(self, force=False): "Update scripts dict" if force or abs(time.time() - self.sdict['ts']) > self.sdict_thr: for item in os.listdir(self.sdir): with open(os.path.join(self.sdir, item), 'r') as istream: self.sdict[item.split('.')[0]] = istream.read() self.sdict['ts'] = time.time() def abs_page(self, tmpl, content): """generate abstract page""" menu = self.templatepage('menu', menus=menus(), tmpl=tmpl) body = self.templatepage('generic', menu=menu, content=content) page = self.templatepage('main', content=body, user=user()) return page def page(self, content): """ Provide page wrapped with top/bottom templates. """ return self.templatepage('main', content=content) def error(self, content): "Generate common error page" content = self.templatepage('error', content=content) return self.abs_page('error', content) @expose def index(self): """Main page""" content = self.templatepage('index', requests=ACTIVE_STATUS, rdict=REQUEST_STATE_TRANSITION) return self.abs_page('main', content) @expose def home(self, **kwds): """Main page""" return self.index(**kwds) ### Request actions ### @expose @checkargs(['status', 'sort']) def assign(self, **kwds): """assign page""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'assignment-approved'}) docs = [] attrs = ['RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus'] dataResult = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] assignDict = deepcopy(self.assignArgs) assignDict.update(getPropValueMap()) assignDict['Team'] = self.getTeams() filter_sort = self.templatepage('filter_sort') content = self.templatepage('assign', sort=sortby, filter_sort_table=filter_sort, sites=SITE_CACHE.getData(), site_white_list=site_white_list(), site_black_list=site_black_list(), user=user(), user_dn=user_dn(), requests=toString(docs), misc_table=json2table(assignDict, web_ui_names(), "all_attributes"), misc_json=json2form(assignDict, indent=2, keep_first_value=True)) return self.abs_page('assign', content) @expose @checkargs(['status', 'sort']) def approve(self, **kwds): """ Approve page: get list of request associated with user DN. Fetch their status list from ReqMgr and display if requests were seen by data-ops. """ if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'new'}) kwds.update({'_nostale': True}) docs = [] attrs = ['RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign'] dataResult = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('approve', requests=toString(docs), date=tstamp(), sort=sortby, filter_sort_table=filter_sort, gen_color=gen_color) return self.abs_page('approve', content) @expose def create(self, **kwds): """create page""" # get list of standard specs from WMCore and new ones from local area # loc_specs_dir = os.path.join(self.spdir, 'Specs') # local specs # loc_specs = spec_list(loc_specs_dir, 'Specs') # all_specs = list(set(self.std_specs + loc_specs)) # all_specs.sort() all_specs = list(self.std_specs) spec = kwds.get('form', '') if not spec: spec = self.std_specs[0] # make spec first in all_specs list if spec in all_specs: all_specs.remove(spec) all_specs = [spec] + all_specs jsondata = get_request_template_from_type(spec) # create templatized page out of provided forms self.update_scripts() content = self.templatepage('create', table=json2table(jsondata, web_ui_names(), jsondata), jsondata=json2form(jsondata, indent=2, keep_first_value=True), name=spec, scripts=[s for s in self.sdict.keys() if s != 'ts'], specs=all_specs) return self.abs_page('create', content) def generate_objs(self, script, jsondict): """Generate objects from givem JSON template""" self.update_scripts() code = self.sdict.get(script, '') if code.find('def genobjs(jsondict)') == -1: return self.error( "Improper python snippet, your code should start with <b>def genobjs(jsondict)</b> function") exec (code) # code snippet must starts with genobjs function return [r for r in genobjs(jsondict)] @expose def config(self, name): "Fetch config for given request name" result = self.reqmgr.getConfig(name) if len(result) == 1: result = result[0] else: result = 'Configuration not found for: %s' % name return result.replace('\n', '<br/>') @expose def fetch(self, rid): "Fetch document for given id" rid = rid.replace('request-', '') doc = self.reqmgr.getRequestByNames(rid) transitions = [] tst = time.time() # get request tasks tasks = self.reqmgr.getRequestTasks(rid) if len(doc) == 1: try: doc = doc[0][rid] except: pass name = doc.get('RequestName', 'NA') title = 'Request %s' % name status = doc.get('RequestStatus', '') transitions = REQUEST_STATE_TRANSITION.get(status, []) if status in transitions: transitions.remove(status) visible_attrs = get_modifiable_properties(status) filterout_attrs = get_protected_properties() # extend filterout list with "RequestStatus" since it is passed separately filterout_attrs.append("RequestStatus") for key, val in self.assignArgs.items(): if not doc.get(key): doc[key] = val if visible_attrs == "all_attributes": filteredDoc = doc for prop in filterout_attrs: if prop in filteredDoc: del filteredDoc[prop] else: filteredDoc = {} for prop in visible_attrs: filteredDoc[prop] = doc.get(prop, "") propValueMap = getPropValueMap() propValueMap['Team'] = self.getTeams() selected = {} for prop in propValueMap: if prop in filteredDoc: filteredDoc[prop], selected[prop] = reorder_list(propValueMap[prop], filteredDoc[prop]) content = self.templatepage('doc', title=title, status=status, name=name, rid=rid, tasks=json2form(tasks, indent=2, keep_first_value=False), table=json2table(filteredDoc, web_ui_names(), visible_attrs, selected), jsondata=json2form(doc, indent=2, keep_first_value=False), doc=json.dumps(doc), time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, humanStates=REQUEST_HUMAN_STATES, ts=tst, user=user(), userdn=user_dn()) elif len(doc) > 1: jsondata = [pprint.pformat(d) for d in doc] content = self.templatepage('doc', title='Series of docs: %s' % rid, table="", jsondata=jsondata, time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, humanStates=REQUEST_HUMAN_STATES, ts=tst, user=user(), userdn=user_dn()) else: doc = 'No request found for name=%s' % rid return self.abs_page('request', content) @expose def record2logdb(self, **kwds): """LogDB submission page""" print(kwds) request = kwds['request'] msg = kwds['message'] self.logdb.post(request, msg) msg = '<h6>Confirmation</h6>Your request has been entered to LogDB.' return self.abs_page('generic', msg) @expose def requests(self, **kwds): """Page showing requests""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'acquired'}) dataResult = self.reqmgr.getRequestByStatus(kwds['status']) attrs = ['RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign'] docs = [] for data in dataResult: for doc in data.values(): docs.append(request_attr(doc, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('requests', requests=toString(docs), sort=sortby, status=kwds['status'], filter_sort_table=filter_sort) return self.abs_page('requests', content) @expose def request(self, **kwargs): "Get data example and expose it as json" dataset = kwargs.get('uinput', '') if not dataset: return {'error': 'no input dataset'} url = 'https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s' % dataset params = {} headers = {'Accept': 'application/json'} wdata = getdata(url, params, headers) wdict = dict(date=time.ctime(), team='Team-A', status='Running', ID=genid(wdata)) winfo = self.templatepage('workflow', wdict=wdict, dataset=dataset, code=pprint.pformat(wdata)) content = self.templatepage('search', content=winfo) return self.abs_page('request', content) @expose def batch(self, **kwds): """batch page""" # TODO: we need a template for batch attributes # and read it from separate area, like DASMaps name = kwds.get('name', '') batch = {} if name: # batch = self.reqmgr.getBatchesByName(name) batch = {'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Attributes': {'HeavyIon': ['true', 'false']}} attributes = batch.get('Attributes', {}) workflows = batch.get('Workflows', []) description = batch.get('Description', '') creator = batch.get('Creator', user_dn()) content = self.templatepage('batch', name=name, attributes=json2table(attributes, web_ui_names()), workflows=workflows, creator=creator, description=description) return self.abs_page('batch', content) @expose def batches(self, **kwds): """Page showing batches""" if not kwds: kwds = {} if 'name' not in kwds: kwds.update({'name': ''}) sortby = kwds.get('sort', 'name') # results = self.reqmgr.getBatchesByName(kwds['name']) results = [ {'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 13 10:36:41 EST 2015', 'Attributes': {'HeavyIon': ['true', 'false']}}, {'Name': 'Batch2', 'Description': 'lksdjflksjdf', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 10 10:36:41 EST 2015', 'Attributes': {'HeavyIon': ['true', 'false']}}, ] docs = [r for r in sort(results, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('batches', batches=docs, sort=sortby, filter_sort_table=filter_sort) return self.abs_page('batches', content) ### Aux methods ### @expose def put_request(self, **kwds): "PUT request callback to reqmgr server, should be used in AJAX" reqname = kwds.get('RequestName', '') status = kwds.get('RequestStatus', '') if not reqname: msg = 'Unable to update request status, empty request name' raise cherrypy.HTTPError(406, msg) if not status: msg = 'Unable to update request status, empty status value' raise cherrypy.HTTPError(406, msg) return self.reqmgr.updateRequestStatus(reqname, status) @expose def images(self, *args): """ Serve static images. """ args = list(args) check_scripts(args, self.imgmap, self.imgdir) mime_types = ['*/*', 'image/gif', 'image/png', 'image/jpg', 'image/jpeg'] accepts = cherrypy.request.headers.elements('Accept') for accept in accepts: if accept.value in mime_types and len(args) == 1 \ and args[0] in self.imgmap: image = self.imgmap[args[0]] # use image extension to pass correct content type ctype = 'image/%s' % image.split('.')[-1] cherrypy.response.headers['Content-type'] = ctype return serve_file(image, content_type=ctype) def serve(self, kwds, imap, idir, datatype='', minimize=False): "Serve files for high level APIs (yui/css/js)" args = [] for key, val in kwds.items(): if key == 'f': # we only look-up files from given kwds dict if isinstance(val, list): args += val else: args.append(val) scripts = check_scripts(args, imap, idir) return self.serve_files(args, scripts, imap, datatype, minimize) @exposecss @tools.gzip() def css(self, **kwargs): """ Serve provided CSS files. They can be passed as f=file1.css&f=file2.css """ resource = kwargs.get('resource', 'css') if resource == 'css': return self.serve(kwargs, self.cssmap, self.cssdir, 'css', True) @exposejs @tools.gzip() def js(self, **kwargs): """ Serve provided JS scripts. They can be passed as f=file1.js&f=file2.js with optional resource parameter to speficy type of JS files, e.g. resource=yui. """ resource = kwargs.get('resource', 'js') if resource == 'js': return self.serve(kwargs, self.jsmap, self.jsdir) def serve_files(self, args, scripts, resource, datatype='', minimize=False): """ Return asked set of files for JS, YUI, CSS. """ idx = "-".join(scripts) if idx not in self._cache.keys(): data = '' if datatype == 'css': data = '@CHARSET "UTF-8";' for script in args: path = os.path.join(sys.path[0], resource[script]) path = os.path.normpath(path) with open(path) as ifile: data = "\n".join([data, ifile.read().replace('@CHARSET "UTF-8";', '')]) if datatype == 'css': set_headers("text/css") if minimize: self._cache[idx] = minify(data) else: self._cache[idx] = data return self._cache[idx]
def __init__(self, rest, config): super(LogDBTasks, self).__init__(config) self.logdb = LogDB(config.central_logdb_url, config.log_reporter)
class MSRuleCleaner(MSCore): """ MSRuleCleaner.py class provides the logic used to clean the Rucio block level data placement rules created by WMAgent. """ def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode" self.curlMgr = RequestHandler() self.targetStatusRegex = re.compile(r'.*archived') self.logDB = LogDB(self.msConfig["logDBUrl"], self.msConfig["logDBReporter"], logger=self.logger) self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger) # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.findTargetStatus), Functor(self.setClean), Functor(self.setArchivalDelayExpired), Functor(self.setLogDBClean), Functor(self.archive)]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': {'normalArchived': 0, 'forceArchived': 0}} self.globalLocks = set() def getGlobalLocks(self): """ Fetches the list of 'globalLocks' from wmstats server and the list of 'parentLocks' from request manager. Stores/updates the unified set in the 'globalLocks' instance variable. Returns the resultant unified set. :return: A union set of the 'globalLocks' and the 'parentLocks' lists """ self.logger.info("Fetching globalLocks list from wmstats server.") try: globalLocks = set(self.wmstatsSvc.getGlobalLocks()) except Exception as ex: msg = "Failed to refresh global locks list for the current polling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.logger.info("Fetching parentLocks list from reqmgr2 server.") try: parentLocks = set(self.reqmgr2.getParentLocks()) except Exception as ex: msg = "Failed to refresh parent locks list for the current poling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.globalLocks = globalLocks | parentLocks def resetCounters(self): """ A simple function for zeroing the cleaned and archived counters. """ for pline in self.cleanuplines: self.wfCounters['cleaned'][pline.name] = 0 self.wfCounters['archived']['normalArchived'] = 0 self.wfCounters['archived']['forceArchived'] = 0 def execute(self, reqStatus): """ Executes the whole ruleCleaner logic :return: summary """ # start threads in MSManager which should call this method summary = dict(RULECLEANER_REPORT) self.currThread = current_thread() self.currThreadIdent = self.currThread.name self.updateReportDict(summary, "thread_id", self.currThreadIdent) self.resetCounters() self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode) # Build the list of workflows to work on: try: requestRecords = {} for status in reqStatus: requestRecords.update(self.getRequestRecords(status)) except Exception as err: # general error msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(err) self.logger.exception(msg) self.updateReportDict(summary, "error", msg) # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords try: self.getGlobalLocks() totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests = self._execute(requestRecords) msg = "\nNumber of processed workflows: %s." msg += "\nNumber of properly cleaned workflows: %s." msg += "\nNumber of normally archived workflows: %s." msg += "\nNumber of force archived workflows: %s." self.logger.info(msg, totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests) self.updateReportDict(summary, "total_num_requests", totalNumRequests) self.updateReportDict(summary, "clean_num_requests", cleanNumRequests) self.updateReportDict(summary, "normal_archived_num_requests", normalArchivedNumRequests) self.updateReportDict(summary, "force_archived_num_requests", forceArchivedNumRequests) except Exception as ex: msg = "Unknown exception while running MSRuleCleaner thread Error: %s" self.logger.exception(msg, str(ex)) self.updateReportDict(summary, "error", msg) return summary def _execute(self, reqRecords): """ Executes the MSRuleCleaner pipelines based on the workflow status :param reqList: A list of RequestRecords to work on :return: a tuple with: number of properly cleaned requests number of processed workflows number of archived workflows """ # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival # Pipelines are executed sequentially in the above order. # This way we assure ourselves that we archive only workflows # that have accomplished the needed cleanup cleanNumRequests = 0 totalNumRequests = 0 # Call the workflow dispatcher: for req in viewvalues(reqRecords): wflow = MSRuleCleanerWflow(req) self._dispatchWflow(wflow) msg = "\n----------------------------------------------------------" msg += "\nMSRuleCleanerWflow: %s" msg += "\n----------------------------------------------------------" self.logger.debug(msg, pformat(wflow)) totalNumRequests += 1 if self._checkClean(wflow): cleanNumRequests += 1 # Report the counters: for pline in self.cleanuplines: msg = "Workflows cleaned by pipeline: %s: %d" self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name]) normalArchivedNumRequests = self.wfCounters['archived']['normalArchived'] forceArchivedNumRequests = self.wfCounters['archived']['forceArchived'] self.logger.info("Workflows normally archived: %d", self.wfCounters['archived']['normalArchived']) self.logger.info("Workflows force archived: %d", self.wfCounters['archived']['forceArchived']) return totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests def _dispatchWflow(self, wflow): """ A function intended to dispatch a workflow (e.g based on its status) through one or more functional pipelines in case there is some more complicated logic involved in the order we execute them but not just a sequentially """ self.logger.debug("Dispatching workflow: %s", wflow['RequestName']) # NOTE: The following dispatch logic is a subject to be changed at any time # Resolve: # NOTE: First resolve any preliminary flags that will be needed further # in the logic of the _dispatcher() itself if wflow['RequestStatus'] == 'announced': self.getMSOutputTransferInfo(wflow) # Clean: # Do not clean any Resubmission, but still let them be archived if wflow['RequestType'] == 'Resubmission': wflow['ForceArchive'] = True msg = "Skipping cleanup step for workflow: %s - RequestType is %s." msg += " Will try to archive it directly." self.logger.info(msg, wflow['RequestName'], wflow['RequestType']) elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']: # NOTE: We do not check the ParentageResolved flag for these # workflows, but we do need to clean output data placement # rules from the agents for them for pline in self.agentlines: try: pline.run(wflow) except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 elif wflow['RequestStatus'] == 'announced' and not wflow['ParentageResolved']: # NOTE: We skip workflows which are not having 'ParentageResolved' # flag, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferDone']: # NOTE: We skip workflows which have not yet finalised their TransferStatus # in MSOutput, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferTape']: # NOTE: We skip workflows which have not yet finalised their tape transfers. # (i.e. even if a single output which is supposed to be covered # by a tape rule is in any of the following transient states: # {REPLICATING, STUCK, SUSPENDED, WAITING_APPROVAL}.) # We still need some proper logging for them. msg = "Skipping workflow: %s - tape transfers are not yet completed." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced': for pline in self.cleanuplines: try: pline.run(wflow) except MSRuleCleanerResolveParentError as ex: msg = "%s: Parentage Resolve Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, pline.name, str(ex)) continue except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 else: # We shouldn't be here: msg = "Skipping workflow: %s - " msg += "Does not fall under any of the defined categories." self.logger.error(msg, wflow['RequestName']) # Archive: try: self.plineArchive.run(wflow) if wflow['ForceArchive']: self.wfCounters['archived']['forceArchived'] += 1 else: self.wfCounters['archived']['normalArchived'] += 1 except MSRuleCleanerArchivalSkip as ex: msg = "%s: Proper conditions not met: %s. " msg += "Skipping archival in the current cycle." self.logger.info(msg, wflow['PlineMarkers'][-1], str(ex)) except MSRuleCleanerArchivalError as ex: msg = "%s: Archival Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, wflow['PlineMarkers'][-1], str(ex)) except Exception as ex: msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex)) def setPlineMarker(self, wflow, pName): """ A function intended to mark which is the pipeline currently working on the workflow. It is supposed to be called always as a first function in the pipeline. :param wflow: A MSRuleCleaner workflow representation :param pName: The name of the functional pipeline :return: The workflow object """ # NOTE: The current functional pipeline MUST always be appended at the # end of the 'PlineMarkers' list # First get rid of the default: if not wflow['PlineMarkers']: wflow['PlineMarkers'] = [] # Then push our current value into the markers list: wflow['PlineMarkers'].append(pName) # Populate the list of flags to be used later: if pName not in wflow['RulesToClean']: if pName in self.cleanupPipeNames: wflow['RulesToClean'][pName] = [] if pName not in wflow['CleanupStatus']: if pName in self.cleanupPipeNames: wflow['CleanupStatus'][pName] = False return wflow def _checkClean(self, wflow): """ An auxiliary function used to only check the temporary cleanup status. It basically takes the pipelines registered in 'PlineMarkers' that have already worked on the workflow as a mask and applies this mask over the set of flags in the 'CleanupStatus' field and then reduces the result to a single bool value """ # NOTE: This is one of the few functions taking a workflow as an argument # but returning a bool, since it is an auxiliary function and is not # supposed to be called as a standalone function in a pipeline. # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows # are also counted as properly cleaned and can trigger archival later # Build a list of bool flags based on the mask of PlineMarkers cleanFlagsList = [wflow['CleanupStatus'][key] for key in wflow['PlineMarkers'] if key in wflow['CleanupStatus']] # If no one have worked on the workflow set the clean status to false if not wflow['PlineMarkers']: cleanStatus = False # If we have a mask longer than the list of flags avoid false positives # because of the behavior explained above - `all([]) == True` elif not cleanFlagsList: cleanStatus = False # Figure out the final value else: cleanStatus = all(cleanFlagsList) return cleanStatus def setClean(self, wflow): """ A function to set the 'IsClean' flag based on the status from all the pipelines which have worked on the workflow (and have put their markers in the 'PlineMarkers' list) :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsClean'] = self._checkClean(wflow) return wflow def _checkLogDBClean(self, wflow): """ An auxiliary function used to only check the LogDB cleanup status. It makes a query to LogDB in order to verify there are no any records for the current workflow :param wflow: A MSRuleCleaner workflow representation :return: True if no records were found in LogDB about wflow """ cleanStatus = False logDBRecords = self.logDB.get(wflow['RequestName']) self.logger.debug("logDBRecords: %s", pformat(logDBRecords)) if not logDBRecords: cleanStatus = True return cleanStatus def setLogDBClean(self, wflow): """ A function to set the 'IsLogDBClean' flag based on the presence of any records in LogDB for the current workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsLogDBClean'] = self._checkLogDBClean(wflow) if not wflow['IsLogDBClean'] and wflow['IsArchivalDelayExpired']: wflow['IsLogDBClean'] = self._cleanLogDB(wflow) return wflow def _cleanLogDB(self, wflow): """ A function to be used for cleaning all the records related to a workflow in logDB. :param wflow: A MSRuleCleaner workflow representation :return: True if NO errors were encountered while deleting records from LogDB """ cleanStatus = False try: if self.msConfig['enableRealMode']: self.logger.info("Deleting %s records from LogDB WMStats...", wflow['RequestName']) res = self.logDB.delete(wflow['RequestName'], agent=False) if res == 'delete-error': msg = "Failed to delete logDB docs for wflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) cleanStatus = True else: self.logger.info("DRY-RUN: NOT Deleting %s records from LogDB WMStats...", wflow['RequestName']) except Exception as ex: msg = "General Exception while cleaning LogDB records for wflow: %s : %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) return cleanStatus def findTargetStatus(self, wflow): """ Find the proper targeted archival status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Check the available status transitions before we decide the final status targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(wflow['RequestStatus'], []) for status in targetStatusList: if self.targetStatusRegex.match(status): wflow['TargetStatus'] = status self.logger.debug("TargetStatus: %s", wflow['TargetStatus']) return wflow def _checkArchDelayExpired(self, wflow): """ A function to check Archival Expiration Delay based on the information returned by WMStatsServer regarding the time of the last request status transition :param wflow: MSRuleCleaner workflow representation :return: True if the archival delay have been expired """ archDelayExpired = False currentTime = int(time.time()) threshold = self.msConfig['archiveDelayHours'] * 3600 try: lastTransitionTime = wflow['RequestTransition'][-1]['UpdateTime'] if lastTransitionTime and (currentTime - lastTransitionTime) > threshold: archDelayExpired = True except KeyError: self.logger.debug("Could not find status transition history for %s", wflow['RequestName']) return archDelayExpired def setArchivalDelayExpired(self, wflow): """ A function to set the 'IsArchivalDelayExpired' flag """ wflow['IsArchivalDelayExpired'] = self._checkArchDelayExpired(wflow) return wflow def archive(self, wflow): """ Move the workflow to the proper archived status after checking the full cleanup status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Make all the needed checks before trying to archive if not (wflow['IsClean'] or wflow['ForceArchive']): msg = "Not properly cleaned workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['TargetStatus']: msg = "Could not determine which archival status to target for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) if not wflow['IsLogDBClean']: msg = "LogDB records have not been cleaned for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['IsArchivalDelayExpired']: msg = "Archival delay period has not yet expired for workflow: %s." % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not self.msConfig['enableRealMode']: msg = "Real Run Mode not enabled." raise MSRuleCleanerArchivalSkip(msg) # Proceed with the actual archival: try: self.reqmgr2.updateRequestStatus(wflow['RequestName'], wflow['TargetStatus']) msg = "Successful status transition to: %s for workflow: %s" self.logger.info(msg, wflow['TargetStatus'], wflow['RequestName']) except Exception as ex: msg = "General Exception while trying status transition to: %s " % wflow['TargetStatus'] msg += "for workflow: %s : %s" % (wflow['RequestName'], str(ex)) raise MSRuleCleanerArchivalError(msg) return wflow def getMSOutputTransferInfo(self, wflow): """ Fetches the transfer information from the MSOutput REST interface for the given workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ headers = {'Accept': 'application/json'} params = {} url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'], wflow['RequestName']) try: res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res)['result'][0] transferInfo = data['transferDoc'] except Exception as ex: msg = "General exception while fetching TransferInfo from MSOutput for %s. " msg += "Error: %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) # Set Transfer status - information fetched from MSOutput only if transferInfo is not None and transferInfo['TransferStatus'] == 'done': wflow['TransferDone'] = True # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput) if transferInfo is not None and transferInfo['OutputMap']: tapeRulesStatusList = [] # For setting 'TransferTape' = True we require either no tape rules for the # workflow have been created or all existing tape rules to be in status 'OK', # so every empty TapeRuleID we consider as completed. for mapRecord in transferInfo['OutputMap']: if not mapRecord['TapeRuleID']: continue rucioRule = self.rucio.getRule(mapRecord['TapeRuleID']) if not rucioRule: tapeRulesStatusList.append(False) msg = "Tape rule: %s not found for workflow: %s " msg += "Possible server side error." self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName']) continue if rucioRule['state'] == 'OK': tapeRulesStatusList.append(True) msg = "Tape rule: %s in final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) else: tapeRulesStatusList.append(False) msg = "Tape rule: %s in non final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) if all(tapeRulesStatusList): wflow['TransferTape'] = True return wflow def setParentDatasets(self, wflow): """ Used to resolve parent datasets for a workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ if wflow['InputDataset'] and wflow['IncludeParents']: childDataset = wflow['InputDataset'] parentDataset = findParent([childDataset], self.msConfig['dbsUrl']) # NOTE: If findParent() returned None then the DBS service failed to # resolve the request (it is considered an ERROR outside WMCore) if parentDataset.get(childDataset, None) is None: msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName']) raise MSRuleCleanerResolveParentError(msg) elif parentDataset: wflow['ParentDataset'] = [parentDataset[childDataset]] msg = "Found parent %s for input dataset %s in workflow: %s " self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName']) else: msg = "Could not find parent for input dataset: %s in workflows: %s" self.logger.error(msg, wflow['InputDataset'], wflow['RequestName']) return wflow def getRucioRules(self, wflow, gran, rucioAcct): """ Queries Rucio and builds the relevant list of blocklevel rules for the given workflow :param wflow: A MSRuleCleaner workflow representation :param gran: Data granularity to search for Rucio rules. Possible values: 'block' or 'container' :return: The workflow object """ currPline = wflow['PlineMarkers'][-1] # Create the container list to the rucio account map and set the checkGlobalLocks flag. mapRuleType = {self.msConfig['rucioWmaAccount']: ["OutputDatasets"], self.msConfig['rucioMStrAccount']: ["InputDataset", "MCPileup", "DataPileup", "ParentDataset"]} if rucioAcct == self.msConfig['rucioMStrAccount']: checkGlobalLocks = True else: checkGlobalLocks = False # Find all the data placement rules created by the components: for dataType in mapRuleType[rucioAcct]: dataList = wflow[dataType] if isinstance(wflow[dataType], list) else [wflow[dataType]] for dataCont in dataList: if dataCont is None: continue self.logger.debug("getRucioRules: dataCont: %s", pformat(dataCont)) if checkGlobalLocks and dataCont in self.globalLocks: msg = "Found dataset: %s in GlobalLocks. NOT considering it for filling the " msg += "RulesToClean list for both container and block level Rules for workflow: %s!" self.logger.info(msg, dataCont, wflow['RequestName']) continue if gran == 'container': for rule in self.rucio.listDataRules(dataCont, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s container-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) elif gran == 'block': try: blocks = self.rucio.getBlocksInContainer(dataCont) for block in blocks: for rule in self.rucio.listDataRules(block, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s block-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) except WMRucioDIDNotFoundException: msg = "Container: %s not found in Rucio for workflow: %s." self.logger.info(msg, dataCont, wflow['RequestName']) return wflow def cleanRucioRules(self, wflow): """ Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner workflow representation. And fills the relevant Cleanup Status. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # NOTE: The function should be called independently and sequentially from # The Input and the respective BlockLevel pipelines. # NOTE: The current functional pipeline is always the last one in the PlineMarkers list currPline = wflow['PlineMarkers'][-1] delResults = [] if self.msConfig['enableRealMode']: for rule in wflow['RulesToClean'][currPline]: self.logger.info("%s: Deleting ruleId: %s ", currPline, rule) delResult = self.rucio.deleteRule(rule) delResults.append(delResult) if not delResult: self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule) else: for rule in wflow['RulesToClean'][currPline]: delResults.append(True) self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule) # Set the cleanup flag: wflow['CleanupStatus'][currPline] = all(delResults) return wflow def getRequestRecords(self, reqStatus): """ Queries ReqMgr2 for requests in a given status. :param reqStatus: The status for the requests to be fetched from ReqMgr2 :return requests: A dictionary with all the workflows in the given status """ self.logger.info("Fetching requests in status: %s", reqStatus) result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True) if not result: requests = {} else: requests = result[0] self.logger.info(' retrieved %s requests in status: %s', len(requests), reqStatus) return requests
class WorkQueueReqMgrInterface(object): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] # this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work self.logger.info("queueing new work") work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception as ex: errorMsg = "Error caught during RequestManager pull" self.logger.exception("%s: %s", errorMsg, str(ex)) try: # get additional open-running work self.logger.info("adding new element to open requests") extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception as ex: errorMsg = "Error caught during RequestManager split" self.logger.exception("%s: %s", errorMsg, str(ex)) try: # report back to ReqMgr self.logger.info("cancel aborted requests") count = self.cancelWork(queue) self.logger.info("finised canceling requests") msg += "Work canceled: %s " % count except Exception as ex: errorMsg = "Error caught during canceling the request" self.logger.exception("%s: %s", errorMsg, str(ex)) queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] try: workLoads = self.getAvailableRequests() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True, agent=False) except TERMINAL_EXCEPTIONS as ex: # fatal error - report back to ReqMgr self.logger.error( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work def cancelWork(self, queue): requests = self.reqMgr2.getRequestByStatus( ['aborted', 'force-complete'], detail=False) count = 0 for req in requests: try: queue.cancelWork(req) count += 1 except Exception as ex: msg = 'Error to cancel the request "%s": %s' % (req, str(ex)) self.logger.exception(msg) return count def report(self, queue): """Report queue status to ReqMgr.""" new_state = {} uptodate_elements = [] now = time.time() elements = queue.statusInbox(dictKey="RequestName") if not elements: return new_state for ele in elements: ele = elements[ele][0] # 1 element tuple try: request = self.reqMgr2.getRequestByNames(ele['RequestName']) if not request: msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[ 'RequestName'] self.logger.warning(msg) continue request = request[0][ele['RequestName']] if request['RequestStatus'] in ('failed', 'completed', 'announced', 'closed-out', 'rejected'): # requests can be done in reqmgr but running in workqueue # if request has been closed but agent cleanup actions # haven't been run (or agent has been retired) # Prune out obviously too old ones to avoid build up if queue.params.get('reqmgrCompleteGraceTime', -1) > 0: if (now - float(ele.updatetime) ) > queue.params['reqmgrCompleteGraceTime']: # have to check all elements are at least running and are old enough request_elements = queue.statusInbox( WorkflowName=request['RequestName']) if not any([ x for x in request_elements if x['Status'] != 'Running' and not x.inEndState() ]): last_update = max([ float(x.updatetime) for x in request_elements ]) if ( now - last_update ) > queue.params['reqmgrCompleteGraceTime']: self.logger.info( "Finishing request %s as it is done in reqmgr" % request['RequestName']) queue.doneWork( WorkflowName=request['RequestName']) continue else: pass # assume workqueue status will catch up later elif request['RequestStatus'] in ['aborted', 'force-complete']: queue.cancelWork(WorkflowName=request['RequestName']) # Check consistency of running-open/closed and the element closure status elif request['RequestStatus'] == 'running-open' and not ele.get( 'OpenForNewData', False): self.reportRequestStatus(ele['RequestName'], 'running-closed') elif request['RequestStatus'] == 'running-closed' and ele.get( 'OpenForNewData', False): queue.closeWork(ele['RequestName']) # we do not want to move the request to 'failed' status elif ele['Status'] == 'Failed': continue elif ele['Status'] not in self._reqMgrToWorkQueueStatus( request['RequestStatus']): self.reportElement(ele) uptodate_elements.append(ele) except Exception as ex: msg = 'Error talking to ReqMgr about request "%s": %s' % ( ele['RequestName'], str(ex)) self.logger.exception(msg) return uptodate_elements def deleteFinishedWork(self, queue, elements): """Delete work from queue that is finished in ReqMgr""" finished = [] for element in elements: if element.inEndState(): finished.append(element['RequestName']) return queue.deleteWorkflows(*finished) def getAvailableRequests(self): """ Get available requests and sort by team and priority returns [(team, request_name, request_spec_url)] """ tempResults = self.reqMgr2.getRequestByStatus("staged") filteredResults = [] for requests in tempResults: for request in requests.values(): filteredResults.append(request) filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True) filteredResults.sort(key=lambda r: r["Team"]) results = [(x["Team"], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults] return results def reportRequestStatus(self, request, status, message=None): """Change state in RequestManager Optionally, take a message to append to the request """ if message: logType = "error" if status == "Failed" else "info" self.logdb.post(request, str(message), logType) reqmgrStatus = self._workQueueToReqMgrStatus(status) if reqmgrStatus: # only send known states try: self.reqMgr2.updateRequestStatus(request, reqmgrStatus) except Exception as ex: msg = "%s : fail to update status will try later: %s" % ( request, str(ex)) msg += traceback.format_exc() self.logdb.post(request, msg, 'warning') raise ex return def _workQueueToReqMgrStatus(self, status): """Map WorkQueue Status to that reported to ReqMgr""" statusMapping = { 'Acquired': 'acquired', 'Running': 'running-open', 'Failed': 'failed', 'Canceled': 'aborted', 'CancelRequested': 'aborted', 'Done': 'completed' } if status in statusMapping: # if wq status passed convert to reqmgr status return statusMapping[status] elif status in REQUEST_STATE_LIST: # if reqmgr status passed return reqmgr status return status else: # unknown status return None def _reqMgrToWorkQueueStatus(self, status): """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation""" statusMapping = { 'acquired': ['Acquired'], 'running': ['Running'], 'running-open': ['Running'], 'running-closed': ['Running'], 'failed': ['Failed'], 'aborted': ['Canceled', 'CancelRequested'], 'force-complete': ['Canceled', 'CancelRequested'], 'completed': ['Done'] } if status in statusMapping: return statusMapping[status] else: return [] def reportElement(self, element): """Report element to ReqMgr""" self.reportRequestStatus(element['RequestName'], element['Status']) def addNewElementsToOpenRequests(self, queue): """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset""" self.logger.info( "Checking Request Manager for open requests and closing old ones") work = 0 requests = [] try: requests = self.reqMgr2.getRequestByStatus("running-open", detail=False) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for reqName in requests: try: self.logger.info("Processing request %s" % (reqName)) units = queue.addWork(requestName=reqName) self.logdb.delete(reqName, 'error', True, agent=False) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - but at least it was split the first time. Log and skip. msg = 'Error adding further work to request "%s". Will try again later' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) traceMsg = traceback.format_exc() msg = "%s\n%s" % (msg, traceMsg) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) added to open requests" % work) return work
def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode" self.curlMgr = RequestHandler() self.targetStatusRegex = re.compile(r'.*archived') self.logDB = LogDB(self.msConfig["logDBUrl"], self.msConfig["logDBReporter"], logger=self.logger) self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger) # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.findTargetStatus), Functor(self.setClean), Functor(self.setArchivalDelayExpired), Functor(self.setLogDBClean), Functor(self.archive)]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': {'normalArchived': 0, 'forceArchived': 0}} self.globalLocks = set()
class WorkQueueReqMgrInterface(object): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] # this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception: self.logger.exception("Error caught during RequestManager pull") try: # get additional open-running work extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception: self.logger.exception("Error caught during RequestManager split") try: # report back to ReqMgr uptodate_elements = self.report(queue) msg += "Updated ReqMgr status for: %s\n" % ", ".join([x['RequestName'] for x in uptodate_elements]) except Exception: self.logger.exception("Error caught during RequestManager update") else: try: # Delete finished requests from WorkQueue self.deleteFinishedWork(queue, uptodate_elements) except Exception: self.logger.exception("Error caught during work deletion") queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError(None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True) except TERMINAL_EXCEPTIONS as ex: # fatal error - report back to ReqMgr self.logger.error('Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue try: self.reportRequestStatus(reqName, "acquired") except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work def report(self, queue): """Report queue status to ReqMgr.""" new_state = {} uptodate_elements = [] now = time.time() elements = queue.statusInbox(dictKey="RequestName") if not elements: return new_state for ele in elements: ele = elements[ele][0] # 1 element tuple try: request = self.reqMgr2.getRequestByNames(ele['RequestName']) if not request: msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele['RequestName'] self.logger.warning(msg) continue request = request[0][ele['RequestName']] if request['RequestStatus'] in ('failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected'): # requests can be done in reqmgr but running in workqueue # if request has been closed but agent cleanup actions # haven't been run (or agent has been retired) # Prune out obviously too old ones to avoid build up if queue.params.get('reqmgrCompleteGraceTime', -1) > 0: if (now - float(ele.updatetime)) > queue.params['reqmgrCompleteGraceTime']: # have to check all elements are at least running and are old enough request_elements = queue.statusInbox(WorkflowName=request['RequestName']) if not any( [x for x in request_elements if x['Status'] != 'Running' and not x.inEndState()]): last_update = max([float(x.updatetime) for x in request_elements]) if (now - last_update) > queue.params['reqmgrCompleteGraceTime']: self.logger.info( "Finishing request %s as it is done in reqmgr" % request['RequestName']) queue.doneWork(WorkflowName=request['RequestName']) continue else: pass # assume workqueue status will catch up later elif request['RequestStatus'] in ['aborted', 'force-complete']: queue.cancelWork(WorkflowName=request['RequestName']) # Check consistency of running-open/closed and the element closure status elif request['RequestStatus'] == 'running-open' and not ele.get('OpenForNewData', False): self.reportRequestStatus(ele['RequestName'], 'running-closed') elif request['RequestStatus'] == 'running-closed' and ele.get('OpenForNewData', False): queue.closeWork(ele['RequestName']) # we do not want to move the request to 'failed' status elif ele['Status'] == 'Failed': continue elif ele['Status'] not in self._reqMgrToWorkQueueStatus(request['RequestStatus']): self.reportElement(ele) uptodate_elements.append(ele) except Exception as ex: msg = 'Error talking to ReqMgr about request "%s": %s' % (ele['RequestName'], str(ex)) self.logger.exception(msg) return uptodate_elements def deleteFinishedWork(self, queue, elements): """Delete work from queue that is finished in ReqMgr""" finished = [] for element in elements: if element.inEndState() and self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected'): finished.append(element['RequestName']) return queue.deleteWorkflows(*finished) def getAvailableRequests(self): """ Get available requests and sort by team and priority returns [(team, request_name, request_spec_url)] """ tempResults = self.reqMgr2.getRequestByStatus("assigned") filteredResults = [] for requests in tempResults: for request in requests.values(): filteredResults.append(request) filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True) filteredResults.sort(key=lambda r: r["Teams"][0]) results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults] return results def reportRequestStatus(self, request, status, message=None): """Change state in RequestManager Optionally, take a message to append to the request """ if message: self.logdb.post(request, str(message), 'info') reqmgrStatus = self._workQueueToReqMgrStatus(status) if reqmgrStatus: # only send known states try: self.reqMgr2.updateRequestStatus(request, reqmgrStatus) except Exception as ex: msg = "%s : fail to update status will try later: %s" % (request, str(ex)) msg += traceback.format_exc() self.logdb.post(request, msg, 'warning') raise ex return def _workQueueToReqMgrStatus(self, status): """Map WorkQueue Status to that reported to ReqMgr""" statusMapping = {'Acquired': 'acquired', 'Running': 'running-open', 'Failed': 'failed', 'Canceled': 'aborted', 'CancelRequested': 'aborted', 'Done': 'completed' } if status in statusMapping: # if wq status passed convert to reqmgr status return statusMapping[status] elif status in REQUEST_STATE_LIST: # if reqmgr status passed return reqmgr status return status else: # unknown status return None def _reqMgrToWorkQueueStatus(self, status): """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation""" statusMapping = {'acquired': ['Acquired'], 'running': ['Running'], 'running-open': ['Running'], 'running-closed': ['Running'], 'failed': ['Failed'], 'aborted': ['Canceled', 'CancelRequested'], 'force-complete': ['Canceled', 'CancelRequested'], 'completed': ['Done']} if status in statusMapping: return statusMapping[status] else: return [] def reportElement(self, element): """Report element to ReqMgr""" self.reportRequestStatus(element['RequestName'], element['Status']) def addNewElementsToOpenRequests(self, queue): """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset""" self.logger.info("Checking Request Manager for open requests and closing old ones") # First close any open inbox element which hasn't found anything new in a while queue.closeWork() self.report(queue) work = 0 requests = [] # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough if queue.params['DrainMode']: self.logger.info('Draining queue: Skip requesting work from ReqMgr') return 0 try: requests = self.reqMgr2.getRequestByStatus("running-open", detail=False) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for reqName in requests: try: self.logger.info("Processing request %s" % (reqName)) units = queue.addWork(requestName=reqName) self.logdb.delete(reqName, 'error', True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - but at least it was split the first time. Log and skip. msg = 'Error adding further work to request "%s". Will try again later' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nError: "%s"' % str(ex) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' % reqName msg += '\nSee log for details.\nError: "%s"' % str(ex) traceMsg = traceback.format_exc() msg = "%s\n%s" % (msg, traceMsg) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) added to open requests" % work) return work
class WorkQueueReqMgrInterface(): """Helper class for ReqMgr interaction""" def __init__(self, **kwargs): if not kwargs.get('logger'): import logging kwargs['logger'] = logging self.logger = kwargs['logger'] #TODO: (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr = RequestManager(kwargs) #this will break all in one test self.reqMgr2 = ReqMgr(kwargs.get("reqmgr2_endpoint", None)) centralurl = kwargs.get("central_logdb_url", "") identifier = kwargs.get("log_reporter", "") # set the thread name before creat the log db. # only sets that when it is not set already myThread = threading.currentThread() if myThread.getName() == "MainThread": myThread.setName(self.__class__.__name__) self.logdb = LogDB(centralurl, identifier, logger=self.logger) self.previous_state = {} def __call__(self, queue): """Synchronize WorkQueue and RequestManager""" msg = '' try: # pull in new work work = self.queueNewRequests(queue) msg += "New Work: %d\n" % work except Exception: self.logger.exception("Error caught during RequestManager pull") try: # get additional open-running work extraWork = self.addNewElementsToOpenRequests(queue) msg += "Work added: %d\n" % extraWork except Exception: self.logger.exception("Error caught during RequestManager split") try: # report back to ReqMgr uptodate_elements = self.report(queue) msg += "Updated ReqMgr status for: %s\n" % ", ".join( [x['RequestName'] for x in uptodate_elements]) except Exception: self.logger.exception("Error caught during RequestManager update") else: try: # Delete finished requests from WorkQueue self.deleteFinishedWork(queue, uptodate_elements) except Exception: self.logger.exception("Error caught during work deletion") queue.backend.recordTaskActivity('reqmgr_sync', msg) def queueNewRequests(self, queue): """Get requests from regMgr and queue to workqueue""" self.logger.info("Contacting Request manager for more work") work = 0 workLoads = [] if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: workLoads = self.getAvailableRequests(queue.params['Teams']) except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for team, reqName, workLoadUrl in workLoads: # try: # self.reportRequestStatus(reqName, "negotiating") # except Exception, ex: # self.logger.error(""" # Unable to update ReqMgr state to negotiating: %s # Ignoring this request: %s""" % (str(ex), reqName)) # continue try: try: Lexicon.couchurl(workLoadUrl) except Exception as ex: # can throw many errors e.g. AttributeError, AssertionError etc. # check its not a local file if not os.path.exists(workLoadUrl): error = WorkQueueWMSpecError( None, "Workflow url validation error: %s" % str(ex)) raise error self.logger.info("Processing request %s at %s" % (reqName, workLoadUrl)) units = queue.queueWork(workLoadUrl, request=reqName, team=team) self.logdb.delete(reqName, "error", this_thread=True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - report back to ReqMgr self.logger.info( 'Permanent failure processing request "%s": %s' % (reqName, str(ex))) self.logger.info("Marking request %s as failed in ReqMgr" % reqName) self.reportRequestStatus(reqName, 'Failed', message=str(ex)) continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue try: self.reportRequestStatus(reqName, "acquired") except Exception as ex: self.logger.warning("Unable to update ReqMgr state: %s" % str(ex)) self.logger.warning('Will try again later') self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) obtained from RequestManager" % work) return work def report(self, queue): """Report queue status to ReqMgr.""" new_state = {} uptodate_elements = [] now = time.time() elements = queue.statusInbox(dictKey="RequestName") if not elements: return new_state for ele in elements: ele = elements[ele][0] # 1 element tuple try: request = self.reqMgr2.getRequestByNames(ele['RequestName']) if not request: msg = 'Failed to get request "%s" from ReqMgr2. Will try again later.' % ele[ 'RequestName'] self.logger.warning(msg) continue request = request[ele['RequestName']] if request['RequestStatus'] in ('failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected'): # requests can be done in reqmgr but running in workqueue # if request has been closed but agent cleanup actions # haven't been run (or agent has been retired) # Prune out obviously too old ones to avoid build up if queue.params.get('reqmgrCompleteGraceTime', -1) > 0: if (now - float(ele.updatetime) ) > queue.params['reqmgrCompleteGraceTime']: # have to check all elements are at least running and are old enough request_elements = queue.statusInbox( WorkflowName=request['RequestName']) if not any([ x for x in request_elements if x['Status'] != 'Running' and not x.inEndState() ]): last_update = max([ float(x.updatetime) for x in request_elements ]) if ( now - last_update ) > queue.params['reqmgrCompleteGraceTime']: self.logger.info( "Finishing request %s as it is done in reqmgr" % request['RequestName']) queue.doneWork( WorkflowName=request['RequestName']) continue else: pass # assume workqueue status will catch up later elif request['RequestStatus'] == 'aborted' or request[ 'RequestStatus'] == 'force-complete': queue.cancelWork(WorkflowName=request['RequestName']) # Check consistency of running-open/closed and the element closure status elif request['RequestStatus'] == 'running-open' and not ele.get( 'OpenForNewData', False): self.reportRequestStatus(ele['RequestName'], 'running-closed') elif request['RequestStatus'] == 'running-closed' and ele.get( 'OpenForNewData', False): queue.closeWork(ele['RequestName']) # update request status if necessary elif ele['Status'] not in self._reqMgrToWorkQueueStatus( request['RequestStatus']): self.reportElement(ele) uptodate_elements.append(ele) except Exception as ex: msg = 'Error talking to ReqMgr about request "%s": %s' traceMsg = traceback.format_exc() self.logger.error(msg % (ele['RequestName'], traceMsg)) return uptodate_elements def deleteFinishedWork(self, queue, elements): """Delete work from queue that is finished in ReqMgr""" finished = [] for element in elements: if self._workQueueToReqMgrStatus(element['Status']) in ('aborted', 'failed', 'completed', 'announced', 'epic-FAILED', 'closed-out', 'rejected') \ and element.inEndState(): finished.append(element['RequestName']) return queue.deleteWorkflows(*finished) def _getRequestsByTeamsAndStatus(self, status, teams=[]): """ TODO: now it assumes one team per requests - check whether this assumption is correct Check whether we actually use the team for this. Also switch to byteamandstatus couch call instead of """ requests = self.reqMgr2.getRequestByStatus(status) #Then sort by Team name then sort by Priority #https://docs.python.org/2/howto/sorting.html if teams and len(teams) > 0: results = {} for reqName, value in requests.items(): if value["Teams"][0] in teams: results[reqName] = value return results else: return requests def getAvailableRequests(self, teams): """ Get available requests for the given teams and sort by team and priority returns [(team, request_name, request_spec_url)] """ tempResults = self._getRequestsByTeamsAndStatus("assigned", teams).values() filteredResults = [] for request in tempResults: if "Teams" in request and len(request["Teams"]) == 1: filteredResults.append(request) self.logdb.delete(request["RequestName"], "error", this_thread=True) else: msg = "no team or more than one team (%s) are assigined: %s" % ( request.get("Teams", None), request["RequestName"]) self.logger.error(msg) self.logdb.post(request["RequestName"], msg, 'error') filteredResults.sort(key=itemgetter('RequestPriority'), reverse=True) filteredResults.sort(key=lambda r: r["Teams"][0]) results = [(x["Teams"][0], x["RequestName"], x["RequestWorkflow"]) for x in filteredResults] return results def reportRequestStatus(self, request, status, message=None): """Change state in RequestManager Optionally, take a message to append to the request """ if message: self.logdb.post(request, str(message), 'info') reqmgrStatus = self._workQueueToReqMgrStatus(status) if reqmgrStatus: # only send known states try: #TODO: try reqmgr1 call if it fails (reqmgr2Only - remove this line when reqmgr is replaced) self.reqMgr.reportRequestStatus(request, reqmgrStatus) # And replace with this (remove all Exceptins) #self.reqMgr2.updateRequestStatus(request, reqmgrStatus) except HTTPException as ex: # If we get an HTTPException of 404 means reqmgr2 request if ex.status == 404: # try reqmgr2 call msg = "%s : reqmgr2 request: %s" % (request, str(ex)) self.logdb.post(request, msg, 'info') self.reqMgr2.updateRequestStatus(request, reqmgrStatus) else: msg = "%s : fail to update status with HTTP error: %s" % ( request, str(ex)) self.logdb.post(request, msg, 'warning') raise ex except Exception as ex: msg = "%s : fail to update status will try later: %s" % ( request, str(ex)) self.logdb.post(request, msg, 'warning') raise ex def markAcquired(self, request, url=None): """Mark request acquired""" self.reqMgr.putWorkQueue(request, url) def _workQueueToReqMgrStatus(self, status): """Map WorkQueue Status to that reported to ReqMgr""" statusMapping = { 'Acquired': 'acquired', 'Running': 'running-open', 'Failed': 'failed', 'Canceled': 'aborted', 'CancelRequested': 'aborted', 'Done': 'completed' } if status in statusMapping: # if wq status passed convert to reqmgr status return statusMapping[status] elif status in REQUEST_STATE_LIST: # if reqmgr status passed return reqmgr status return status else: # unknown status return None def _reqMgrToWorkQueueStatus(self, status): """Map ReqMgr status to that in a WorkQueue element, it is not a 1-1 relation""" statusMapping = { 'acquired': ['Acquired'], 'running': ['Running'], 'running-open': ['Running'], 'running-closed': ['Running'], 'failed': ['Failed'], 'aborted': ['Canceled', 'CancelRequested'], 'force-complete': ['Canceled', 'CancelRequested'], 'completed': ['Done'] } if status in statusMapping: return statusMapping[status] else: return [] def reportElement(self, element): """Report element to ReqMgr""" self.reportRequestStatus(element['RequestName'], element['Status']) def addNewElementsToOpenRequests(self, queue): """Add new elements to open requests which are in running-open state, only works adding new blocks from the input dataset""" self.logger.info( "Checking Request Manager for open requests and closing old ones") # First close any open inbox element which hasn't found anything new in a while queue.closeWork() self.report(queue) work = 0 requests = [] # Drain mode, don't pull any work into open requests. They will be closed if the queue stays in drain long enough if queue.params['DrainMode']: self.logger.info( 'Draining queue: Skip requesting work from ReqMgr') return 0 try: requests = self._getRequestsByTeamsAndStatus( "running-open", queue.params['Teams']).keys() except Exception as ex: traceMsg = traceback.format_exc() msg = "Error contacting RequestManager: %s" % traceMsg self.logger.warning(msg) return 0 for reqName in requests: try: self.logger.info("Processing request %s" % (reqName)) units = queue.addWork(requestName=reqName) self.logdb.delete(request["RequestName"], 'error', True) except (WorkQueueWMSpecError, WorkQueueNoWorkError) as ex: # fatal error - but at least it was split the first time. Log and skip. msg = 'Error adding further work to request "%s". Will try again later' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except (IOError, socket.error, CouchError, CouchConnectionError) as ex: # temporary problem - try again later msg = 'Error processing request "%s": will try again later.' \ '\nError: "%s"' % (reqName, str(ex)) self.logger.info(msg) self.logdb.post(reqName, msg, 'error') continue except Exception as ex: # Log exception as it isnt a communication problem msg = 'Error processing request "%s": will try again later.' \ '\nSee log for details.\nError: "%s"' % (reqName, str(ex)) self.logger.exception('Unknown error processing %s' % reqName) self.logdb.post(reqName, msg, 'error') continue self.logger.info('%s units(s) queued for "%s"' % (units, reqName)) work += units self.logger.info("%s element(s) added to open requests" % work) return work
class ReqMgrService(TemplatedPage): """ Request Manager web service class """ def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception( 'ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = { key: val['default'] for key, val in self.assignArgs.items() } def getTeams(self): "Helper function to get teams from wmstats or local cache" teams = self.team_cache url = '%s/data/teams' % self.wmstatsurl params = {} headers = {'Accept': 'application/json'} try: data = getdata(url, params, headers) if 'error' in data: print("WARNING: fail to get teams from %s" % url) print(data) teams = data.get('result', []) self.team_cache = teams except Exception as exp: print("WARNING: fail to get teams from %s" % url) print(str(exp)) return teams def update_scripts(self, force=False): "Update scripts dict" if force or abs(time.time() - self.sdict['ts']) > self.sdict_thr: for item in os.listdir(self.sdir): with open(os.path.join(self.sdir, item), 'r') as istream: self.sdict[item.split('.')[0]] = istream.read() self.sdict['ts'] = time.time() def abs_page(self, tmpl, content): """generate abstract page""" menu = self.templatepage('menu', menus=menus(), tmpl=tmpl) body = self.templatepage('generic', menu=menu, content=content) page = self.templatepage('main', content=body, user=user()) return page def page(self, content): """ Provide page wrapped with top/bottom templates. """ return self.templatepage('main', content=content) def error(self, content): "Generate common error page" content = self.templatepage('error', content=content) return self.abs_page('error', content) @expose def index(self): """Main page""" content = self.templatepage('index', requests=ACTIVE_STATUS, rdict=REQUEST_STATE_TRANSITION) return self.abs_page('main', content) @expose def home(self, **kwds): """Main page""" return self.index(**kwds) ### Request actions ### @expose @checkargs(['status', 'sort']) def assign(self, **kwds): """assign page""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'assignment-approved'}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus' ] dataResult = self.reqmgr.getRequestByStatus( statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] assignDict = deepcopy(self.assignArgs) assignDict.update(getPropValueMap()) assignDict['Team'] = self.getTeams() filter_sort = self.templatepage('filter_sort') content = self.templatepage('assign', sort=sortby, filter_sort_table=filter_sort, sites=SITE_CACHE.getData(), site_white_list=site_white_list(), site_black_list=site_black_list(), user=user(), user_dn=user_dn(), requests=toString(docs), misc_table=json2table( assignDict, web_ui_names(), "all_attributes"), misc_json=json2form(assignDict, indent=2, keep_first_value=True)) return self.abs_page('assign', content) @expose @checkargs(['status', 'sort']) def approve(self, **kwds): """ Approve page: get list of request associated with user DN. Fetch their status list from ReqMgr and display if requests were seen by data-ops. """ if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'new'}) kwds.update({'_nostale': True}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign' ] dataResult = self.reqmgr.getRequestByStatus( statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('approve', requests=toString(docs), date=tstamp(), sort=sortby, filter_sort_table=filter_sort, gen_color=gen_color) return self.abs_page('approve', content) @expose def create(self, **kwds): """create page""" # get list of standard specs from WMCore and new ones from local area # loc_specs_dir = os.path.join(self.spdir, 'Specs') # local specs # loc_specs = spec_list(loc_specs_dir, 'Specs') # all_specs = list(set(self.std_specs + loc_specs)) # all_specs.sort() all_specs = list(self.std_specs) spec = kwds.get('form', '') if not spec: spec = self.std_specs[0] # make spec first in all_specs list if spec in all_specs: all_specs.remove(spec) all_specs = [spec] + all_specs jsondata = get_request_template_from_type(spec) # create templatized page out of provided forms self.update_scripts() content = self.templatepage( 'create', table=json2table(jsondata, web_ui_names(), jsondata), jsondata=json2form(jsondata, indent=2, keep_first_value=True), name=spec, scripts=[s for s in self.sdict.keys() if s != 'ts'], specs=all_specs) return self.abs_page('create', content) def generate_objs(self, script, jsondict): """Generate objects from givem JSON template""" self.update_scripts() code = self.sdict.get(script, '') if code.find('def genobjs(jsondict)') == -1: return self.error( "Improper python snippet, your code should start with <b>def genobjs(jsondict)</b> function" ) exec(code) # code snippet must starts with genobjs function return [r for r in genobjs(jsondict)] @expose def config(self, name): "Fetch config for given request name" result = self.reqmgr.getConfig(name) if len(result) == 1: result = result[0] else: result = 'Configuration not found for: %s' % name return result.replace('\n', '<br/>') @expose def fetch(self, rid): "Fetch document for given id" rid = rid.replace('request-', '') doc = self.reqmgr.getRequestByNames(rid) transitions = [] tst = time.time() # get request tasks tasks = self.reqmgr.getRequestTasks(rid) if len(doc) == 1: try: doc = doc[0][rid] except: pass name = doc.get('RequestName', 'NA') title = 'Request %s' % name status = doc.get('RequestStatus', '') transitions = REQUEST_STATE_TRANSITION.get(status, []) if status in transitions: transitions.remove(status) visible_attrs = get_modifiable_properties(status) filterout_attrs = get_protected_properties() # extend filterout list with "RequestStatus" since it is passed separately filterout_attrs.append("RequestStatus") for key, val in self.assignArgs.items(): if not doc.get(key): doc[key] = val if visible_attrs == "all_attributes": filteredDoc = doc for prop in filterout_attrs: if prop in filteredDoc: del filteredDoc[prop] else: filteredDoc = {} for prop in visible_attrs: filteredDoc[prop] = doc.get(prop, "") propValueMap = getPropValueMap() propValueMap['Team'] = self.getTeams() selected = {} for prop in propValueMap: if prop in filteredDoc: filteredDoc[prop], selected[prop] = reorder_list( propValueMap[prop], filteredDoc[prop]) content = self.templatepage( 'doc', title=title, status=status, name=name, rid=rid, tasks=json2form(tasks, indent=2, keep_first_value=False), table=json2table(filteredDoc, web_ui_names(), visible_attrs, selected), jsondata=json2form(doc, indent=2, keep_first_value=False), doc=json.dumps(doc), time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, ts=tst, user=user(), userdn=user_dn()) elif len(doc) > 1: jsondata = [pprint.pformat(d) for d in doc] content = self.templatepage('doc', title='Series of docs: %s' % rid, table="", jsondata=jsondata, time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, ts=tst, user=user(), userdn=user_dn()) else: doc = 'No request found for name=%s' % rid return self.abs_page('request', content) @expose def record2logdb(self, **kwds): """LogDB submission page""" print(kwds) request = kwds['request'] msg = kwds['message'] self.logdb.post(request, msg) msg = '<h6>Confirmation</h6>Your request has been entered to LogDB.' return self.abs_page('generic', msg) @expose def requests(self, **kwds): """Page showing requests""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'acquired'}) dataResult = self.reqmgr.getRequestByStatus(kwds['status']) attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign' ] docs = [] for data in dataResult: for doc in data.values(): docs.append(request_attr(doc, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('requests', requests=toString(docs), sort=sortby, status=kwds['status'], filter_sort_table=filter_sort) return self.abs_page('requests', content) @expose def request(self, **kwargs): "Get data example and expose it as json" dataset = kwargs.get('uinput', '') if not dataset: return {'error': 'no input dataset'} url = 'https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s' % dataset params = {} headers = {'Accept': 'application/json'} wdata = getdata(url, params, headers) wdict = dict(date=time.ctime(), team='Team-A', status='Running', ID=genid(wdata)) winfo = self.templatepage('workflow', wdict=wdict, dataset=dataset, code=pprint.pformat(wdata)) content = self.templatepage('search', content=winfo) return self.abs_page('request', content) @expose def batch(self, **kwds): """batch page""" # TODO: we need a template for batch attributes # and read it from separate area, like DASMaps name = kwds.get('name', '') batch = {} if name: # batch = self.reqmgr.getBatchesByName(name) batch = { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Attributes': { 'HeavyIon': ['true', 'false'] } } attributes = batch.get('Attributes', {}) workflows = batch.get('Workflows', []) description = batch.get('Description', '') creator = batch.get('Creator', user_dn()) content = self.templatepage('batch', name=name, attributes=json2table( attributes, web_ui_names()), workflows=workflows, creator=creator, description=description) return self.abs_page('batch', content) @expose def batches(self, **kwds): """Page showing batches""" if not kwds: kwds = {} if 'name' not in kwds: kwds.update({'name': ''}) sortby = kwds.get('sort', 'name') # results = self.reqmgr.getBatchesByName(kwds['name']) results = [ { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 13 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, { 'Name': 'Batch2', 'Description': 'lksdjflksjdf', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 10 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, ] docs = [r for r in sort(results, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('batches', batches=docs, sort=sortby, filter_sort_table=filter_sort) return self.abs_page('batches', content) ### Aux methods ### @expose def put_request(self, **kwds): "PUT request callback to reqmgr server, should be used in AJAX" reqname = kwds.get('RequestName', '') status = kwds.get('RequestStatus', '') if not reqname: msg = 'Unable to update request status, empty request name' raise cherrypy.HTTPError(406, msg) if not status: msg = 'Unable to update request status, empty status value' raise cherrypy.HTTPError(406, msg) return self.reqmgr.updateRequestStatus(reqname, status) @expose def images(self, *args): """ Serve static images. """ args = list(args) check_scripts(args, self.imgmap, self.imgdir) mime_types = [ '*/*', 'image/gif', 'image/png', 'image/jpg', 'image/jpeg' ] accepts = cherrypy.request.headers.elements('Accept') for accept in accepts: if accept.value in mime_types and len(args) == 1 \ and args[0] in self.imgmap: image = self.imgmap[args[0]] # use image extension to pass correct content type ctype = 'image/%s' % image.split('.')[-1] cherrypy.response.headers['Content-type'] = ctype return serve_file(image, content_type=ctype) def serve(self, kwds, imap, idir, datatype='', minimize=False): "Serve files for high level APIs (yui/css/js)" args = [] for key, val in kwds.items(): if key == 'f': # we only look-up files from given kwds dict if isinstance(val, list): args += val else: args.append(val) scripts = check_scripts(args, imap, idir) return self.serve_files(args, scripts, imap, datatype, minimize) @exposecss @tools.gzip() def css(self, **kwargs): """ Serve provided CSS files. They can be passed as f=file1.css&f=file2.css """ resource = kwargs.get('resource', 'css') if resource == 'css': return self.serve(kwargs, self.cssmap, self.cssdir, 'css', True) @exposejs @tools.gzip() def js(self, **kwargs): """ Serve provided JS scripts. They can be passed as f=file1.js&f=file2.js with optional resource parameter to speficy type of JS files, e.g. resource=yui. """ resource = kwargs.get('resource', 'js') if resource == 'js': return self.serve(kwargs, self.jsmap, self.jsdir) def serve_files(self, args, scripts, resource, datatype='', minimize=False): """ Return asked set of files for JS, YUI, CSS. """ idx = "-".join(scripts) if idx not in self._cache.keys(): data = '' if datatype == 'css': data = '@CHARSET "UTF-8";' for script in args: path = os.path.join(sys.path[0], resource[script]) path = os.path.normpath(path) ifile = open(path) data = "\n".join([data, ifile.read(). \ replace('@CHARSET "UTF-8";', '')]) ifile.close() if datatype == 'css': set_headers("text/css") if minimize: self._cache[idx] = minify(data) else: self._cache[idx] = data return self._cache[idx]
class ReqMgrService(TemplatedPage): """ Request Manager web service class """ def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir, 'WMSpec.StdSpecs') self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize rest API statedir = '/tmp' app = RESTMain(config, statedir) # REST application mount = '/rest' # mount point for cherrypy service api = RestApiHub(app, config.reqmgr, mount) # initialize access to reqmgr2 APIs self.reqmgr = ReqMgr(config.reqmgr.reqmgr2_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # admin helpers self.admin_info = Info(app, api, config.reqmgr, mount=mount + '/info') self.admin_group = Group(app, api, config.reqmgr, mount=mount + '/group') self.admin_team = Team(app, api, config.reqmgr, mount=mount + '/team') # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_5_2_5') self.sw_arch = cdict.get('default_sw_scramarch', 'slc5_amd64_gcc434') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) def user(self): """ Return user name associated with this instance. """ try: return cherrypy.request.user['login'] except: return 'testuser' def user_dn(self): "Return user DN" try: return cherrypy.request.user['dn'] except: return '/CN/bla/foo' def update_scripts(self, force=False): "Update scripts dict" if force or abs(time.time() - self.sdict['ts']) > self.sdict_thr: for item in os.listdir(self.sdir): with open(os.path.join(self.sdir, item), 'r') as istream: self.sdict[item.split('.')[0]] = istream.read() self.sdict['ts'] = time.time() def abs_page(self, tmpl, content): """generate abstract page""" menu = self.templatepage('menu', menus=menus(), tmpl=tmpl) body = self.templatepage('generic', menu=menu, content=content) page = self.templatepage('main', content=body, user=self.user()) return page def page(self, content): """ Provide page wrapped with top/bottom templates. """ return self.templatepage('main', content=content) def error(self, content): "Generate common error page" content = self.templatepage('error', content=content) return self.abs_page('error', content) @expose def index(self, **kwds): """Main page""" content = self.templatepage('index', requests=ACTIVE_STATUS, rdict=REQUEST_STATE_TRANSITION) return self.abs_page('main', content) @expose def home(self, **kwds): """Main page""" return self.index(**kwds) ### Admin actions ### @expose def admin(self, **kwds): """admin page""" print "\n### ADMIN PAGE" rows = self.admin_info.get() print "rows", [r for r in rows] content = self.templatepage('admin') return self.abs_page('admin', content) @expose def add_user(self, **kwds): """add_user action""" rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) @expose def add_group(self, **kwds): """add_group action""" rows = self.admin_group.get() print "\n### GROUPS", [r for r in rows] rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) @expose def add_team(self, **kwds): """add_team action""" rows = self.admin_team.get() print "\n### TEAMS", kwds, [r for r in rows] print "request to add", kwds rid = genid(kwds) status = "ok" # chagne to whatever it would be content = self.templatepage('confirm', ticket=rid, user=self.user(), status=status) return self.abs_page('admin', content) ### Request actions ### @expose @checkargs(['status', 'sort']) def assign(self, **kwds): """assign page""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'assignment-approved'}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus' ] data = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for key, val in data.items(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] misc_json = { 'CMSSW Releases': releases(), 'CMSSW architectures': architectures(), 'SubscriptionPriority': ['Low', 'Normal', 'High'], 'CustodialSubType': ['Move', 'Replica'], 'NonCustodialSubType': ['Move', 'Replica'], 'MinMergeSize': 2147483648, 'MaxMergeSize': 4294967296, 'MaxMergeEvents': 50000, 'MaxRSS': 20411724, 'MaxVSize': 20411724, 'SoftTimeout': 129600, 'GracePeriod': 300, 'BlockCloseMaxWaitTime': 66400, 'BlockCloseMaxFiles': 500, 'BlockCloseMaxEvents': 250000000, 'BlockCloseMaxSize': 5000000000000, 'AcquisitionEra': '', 'ProcessingVersion': 1, 'ProcessingString': '', 'MergedLFNBase': lfn_bases(), 'UnmergedLFNBase': lfn_unmerged_bases(), } filter_sort = self.templatepage('filter_sort') content = self.templatepage('assign', sort=sortby, filter_sort_table=filter_sort, sites=sites(), site_white_list=site_white_list(), site_black_list=site_black_list(), user=self.user(), user_dn=self.user_dn(), requests=docs, misc_table=json2table( misc_json, web_ui_names()), misc_json=json2form(misc_json, indent=2, keep_first_value=True)) return self.abs_page('assign', content) @expose @checkargs(['status', 'sort']) def approve(self, **kwds): """ Approve page: get list of request associated with user DN. Fetch their status list from ReqMgr and display if requests were seen by data-ops. """ if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'new'}) kwds.update({'_nostale': True}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus' ] data = self.reqmgr.getRequestByStatus(statusList=[kwds['status']]) for key, val in data.items(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('approve', requests=docs, date=tstamp(), sort=sortby, filter_sort_table=filter_sort) return self.abs_page('approve', content) @expose def create(self, **kwds): """create page""" # get list of standard specs from WMCore and new ones from local area #loc_specs_dir = os.path.join(self.spdir, 'Specs') # local specs #loc_specs = spec_list(loc_specs_dir, 'Specs') #all_specs = list(set(self.std_specs + loc_specs)) #all_specs.sort() all_specs = self.std_specs spec = kwds.get('form', '') if not spec: spec = self.std_specs[0] # make spec first in all_specs list if spec in all_specs: all_specs.remove(spec) all_specs = [spec] + all_specs jsondata = get_request_template_from_type(spec) # create templatized page out of provided forms self.update_scripts() content = self.templatepage( 'create', table=json2table(jsondata, web_ui_names()), jsondata=json2form(jsondata, indent=2, keep_first_value=True), name=spec, scripts=[s for s in self.sdict.keys() if s != 'ts'], specs=all_specs) return self.abs_page('create', content) def generate_objs(self, script, jsondict): """Generate objects from givem JSON template""" self.update_scripts() code = self.sdict.get(script, '') if code.find('def genobjs(jsondict)') == -1: return self.error( "Improper python snippet, your code should start with <b>def genobjs(jsondict)</b> function" ) exec(code) # code snippet must starts with genobjs function return [r for r in genobjs(jsondict)] @expose def fetch(self, rid, **kwds): "Fetch document for given id" rid = rid.replace('request-', '') doc = self.reqmgr.getRequestByNames(rid) transitions = [] tstamp = time.time() if len(doc) == 1: try: doc = doc[rid] except: pass name = doc.get('RequestName', 'NA') title = 'Request %s' % name status = doc.get('RequestStatus', '') transitions = REQUEST_STATE_TRANSITION.get(status, []) if status in transitions: transitions.remove(status) content = self.templatepage('doc', title=title, status=status, name=name, table=json2table(doc, web_ui_names()), jsondata=json2form( doc, indent=2, keep_first_value=False), transitions=transitions, ts=tstamp, user=self.user(), userdn=self.user_dn()) elif len(doc) > 1: jsondata = [pprint.pformat(d) for d in doc] content = self.templatepage('doc', title='Series of docs: %s' % rid, table="", jsondata=jsondata, transitions=transitions, ts=tstamp, user=self.user(), userdn=self.user_dn()) else: doc = 'No request found for name=%s' % rid return self.abs_page('request', content) @expose def record2logdb(self, **kwds): """LogDB submission page""" print(kwds) request = kwds['request'] msg = kwds['message'] self.logdb.post(request, msg) msg = '<h6>Confirmation</h6>Your request has been entered to LogDB.' return self.abs_page('generic', msg) @expose def requests(self, **kwds): """Page showing requests""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'acquired'}) results = self.reqmgr.getRequestByStatus(kwds['status']) docs = [] for key, doc in results.items(): docs.append(request_attr(doc)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('requests', requests=docs, sort=sortby, status=kwds['status'], filter_sort_table=filter_sort) return self.abs_page('requests', content) @expose def request(self, **kwargs): "Get data example and expose it as json" dataset = kwargs.get('uinput', '') if not dataset: return {'error': 'no input dataset'} url = 'https://cmsweb.cern.ch/reqmgr/rest/outputdataset/%s' % dataset params = {} headers = {'Accept': 'application/json;text/json'} wdata = getdata(url, params) wdict = dict(date=time.ctime(), team='Team-A', status='Running', ID=genid(wdata)) winfo = self.templatepage('workflow', wdict=wdict, dataset=dataset, code=pprint.pformat(wdata)) content = self.templatepage('search', content=winfo) return self.abs_page('request', content) @expose def batch(self, **kwds): """batch page""" # TODO: we need a template for batch attributes # and read it from separate area, like DASMaps name = kwds.get('name', '') batch = {} if name: # batch = self.reqmgr.getBatchesByName(name) batch = { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Attributes': { 'HeavyIon': ['true', 'false'] } } attributes = batch.get('Attributes', {}) workflows = batch.get('Workflows', []) description = batch.get('Description', '') creator = batch.get('Creator', self.user_dn()) content = self.templatepage('batch', name=name, attributes=json2table( attributes, web_ui_names()), workflows=workflows, creator=creator, description=description) return self.abs_page('batch', content) @expose def batches(self, **kwds): """Page showing batches""" if not kwds: kwds = {} if 'name' not in kwds: kwds.update({'name': ''}) sortby = kwds.get('sort', 'name') # results = self.reqmgr.getBatchesByName(kwds['name']) results = [ { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 13 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, { 'Name': 'Batch2', 'Description': 'lksdjflksjdf', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 10 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, ] docs = [r for r in sort(results, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('batches', batches=docs, sort=sortby, filter_sort_table=filter_sort) return self.abs_page('batches', content) ### Aux methods ### @expose def put_request(self, *args, **kwds): "PUT request callback to reqmgr server, should be used in AJAX" reqname = kwds.get('RequestName', '') status = kwds.get('RequestStatus', '') if not reqname: msg = 'Unable to update request status, empty request name' raise cherrypy.HTTPError(406, msg) if not status: msg = 'Unable to update request status, empty status value' raise cherrypy.HTTPError(406, msg) return self.reqmgr.updateRequestStatus(reqname, status) @expose def images(self, *args, **kwargs): """ Serve static images. """ args = list(args) self.check_scripts(args, self.imgmap, self.imgdir) mime_types = [ '*/*', 'image/gif', 'image/png', 'image/jpg', 'image/jpeg' ] accepts = cherrypy.request.headers.elements('Accept') for accept in accepts: if accept.value in mime_types and len(args) == 1 \ and args[0] in self.imgmap: image = self.imgmap[args[0]] # use image extension to pass correct content type ctype = 'image/%s' % image.split('.')[-1] cherrypy.response.headers['Content-type'] = ctype return serve_file(image, content_type=ctype) def serve(self, kwds, imap, idir, datatype='', minimize=False): "Serve files for high level APIs (yui/css/js)" args = [] for key, val in kwds.items(): if key == 'f': # we only look-up files from given kwds dict if isinstance(val, list): args += val else: args.append(val) scripts = self.check_scripts(args, imap, idir) return self.serve_files(args, scripts, imap, datatype, minimize) @exposecss @tools.gzip() def css(self, **kwargs): """ Serve provided CSS files. They can be passed as f=file1.css&f=file2.css """ resource = kwargs.get('resource', 'css') if resource == 'css': return self.serve(kwargs, self.cssmap, self.cssdir, 'css', True) @exposejs @tools.gzip() def js(self, **kwargs): """ Serve provided JS scripts. They can be passed as f=file1.js&f=file2.js with optional resource parameter to speficy type of JS files, e.g. resource=yui. """ resource = kwargs.get('resource', 'js') if resource == 'js': return self.serve(kwargs, self.jsmap, self.jsdir) def serve_files(self, args, scripts, resource, datatype='', minimize=False): """ Return asked set of files for JS, YUI, CSS. """ idx = "-".join(scripts) if idx not in self._cache.keys(): data = '' if datatype == 'css': data = '@CHARSET "UTF-8";' for script in args: path = os.path.join(sys.path[0], resource[script]) path = os.path.normpath(path) ifile = open(path) data = "\n".join ([data, ifile.read().\ replace('@CHARSET "UTF-8";', '')]) ifile.close() if datatype == 'css': set_headers("text/css") if minimize: self._cache[idx] = minify(data) else: self._cache[idx] = data return self._cache[idx] def check_scripts(self, scripts, resource, path): """ Check a script is known to the resource map and that the script actually exists """ for script in scripts: if script not in resource.keys(): spath = os.path.normpath(os.path.join(path, script)) if os.path.isfile(spath): resource.update({script: spath}) return scripts