def getSpec(request, reqSpecs=None): "Get request from workload cache" if reqSpecs and request['RequestName'] in reqSpecs: return reqSpecs[request['RequestName']] url = str('%s/%s/spec' % (reqmgrCacheUrl(), request['RequestName'])) mgr = RequestHandler() data = mgr.getdata(url, params={}, cert=cert(), ckey=ckey()) return pickle.loads(data)
def findParent(dataset): "Helper function to find a parent of the dataset" url = '%s/datasetparents' % dbsUrl() params = {'dataset': dataset} headers = {'Accept': 'application/json'} mgr = RequestHandler() data = mgr.getdata(url, params=params, headers=headers, cert=cert(), ckey=ckey()) return [str(i['parent_dataset']) for i in json.loads(data)]
def getWorkflows(state): "Get list of workflows from ReqMgr2 data-service" url = '%s/data/request' % reqmgrUrl() headers = {'Accept': 'application/json'} params = {'status': state} mgr = RequestHandler() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res) return data.get('result', [])
def getNodes(kind): "Get list of PhEDEx nodes" params = {} headers = {'Accept': 'application/json'} url = '%s/nodes' % phedexUrl() mgr = RequestHandler() data = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) nodes = json.loads(data)['phedex']['node'] return [node['name'] for node in nodes if node['kind'] == kind]
def getWorkflow(requestName): "Get list of workflow info from ReqMgr2 data-service for given request name" headers = {'Accept': 'application/json'} params = {} url = '%s/data/request/%s' % (reqmgrUrl(), requestName) mgr = RequestHandler() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res) return data.get('result', [])
def getRequest(url, params): "Helper function to GET data from given URL" mgr = RequestHandler() headers = {'Accept': 'application/json'} verbose = 0 if 'verbose' in params: verbose = params['verbose'] del params['verbose'] data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert(), verbose=verbose) return data
def getNodesForId(phedexid): "Helper function to get nodes for given phedex id" url = '%s/requestlist' % phedexUrl() params = {'request': str(phedexid)} headers = {'Accept': 'application/json'} mgr = RequestHandler() data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert()) items = json.loads(data)['phedex']['request'] nodes = [n['name'] for i in items for n in i['node']] return list(set(nodes))
def getNodeQueues(): "Helper function to fetch nodes usage from PhEDEx data service" headers = {'Accept': 'application/json'} params = {} mgr = RequestHandler() url = '%s/nodeusagehistory' % phedexUrl() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res) ret = defaultdict(int) for node in data['phedex']['node']: for usage in node['usage']: ret[node['name']] += int(usage['miss_bytes'] / 1023.**4) #in TB return ret
def getNodes(kind): "Get list of PhEDEx nodes" params = {} headers = {'Accept': 'application/json'} url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes' mgr = RequestHandler() data = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) nodes = json.loads(data)['phedex']['node'] return [node['name'] for node in nodes if node['kind'] == kind]
def workqueueRequests(state=None): "Helper functions to get requests from WorkQueue" url = workqueueView('jobsByRequest') if state: pass # we may need to use state when we'll query WorkQueue params = {} headers = {'Accept': 'application/json'} mgr = RequestHandler() data = mgr.getdata(url, params=params, headers=headers, cert=cert(), ckey=ckey()) data = json.loads(data) rdict = {} for row in data.get('rows', []): rdict[row['key']] = row['value'] return rdict
def setUp(self): "Setup MicroService for testing" self.app = ServiceManager() config = TestConfig manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager' config.views.data.manager = manager config.manager = manager mount = '/microservice' self.mgr = RequestHandler() self.port = config.main.port self.url = 'http://localhost:%s%s/data' % (self.port, mount) cherrypy.config["server.socket_port"] = self.port self.server = RestInterface(self.app, config, mount) cherrypy.tree.mount(self.server, mount) cherrypy.engine.start()
def getUrlOpener(self): """ method getting an HTTPConnection, it is used by the constructor such that a sub class can override it to have different type of connection i.e. - if it needs authentication, or some fancy handler """ return RequestHandler(config={'timeout': 300, 'connecttimeout': 300})
class MicroServiceTest(unittest.TestCase): "Unit test for MicroService module" def setUp(self): "Setup MicroService for testing" self.app = ServiceManager() config = TestConfig manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager' config.views.data.manager = manager config.manager = manager mount = '/microservice' self.mgr = RequestHandler() self.port = config.main.port self.url = 'http://localhost:%s%s/data' % (self.port, mount) cherrypy.config["server.socket_port"] = self.port self.server = RestInterface(self.app, config, mount) cherrypy.tree.mount(self.server, mount) cherrypy.engine.start() def tearDown(self): "Tear down MicroService" cherrypy.engine.exit() cherrypy.engine.stop() def postRequest(self, params): "Perform POST request to our MicroService" headers = {'Content-type': 'application/json'} print("### post call %s params=%s headers=%s" % (self.url, params, headers)) data = self.mgr.getdata(self.url, params=params, headers=headers, \ verb='POST', cert=cert(), ckey=ckey()) print("### post call data %s" % data) return json.loads(data) def test_getState(self): "Test function for getting state of the MicroService" url = '%s/status' % self.url data = self.mgr.getdata(url, params={}) state = "bla" data = {"request": {"state": state}} self.postRequest(data) data = self.mgr.getdata(url, params={}) data = json.loads(data) print("### url=%s, data=%s" % (url, data)) for row in data['result']: if 'state' in row: self.assertEqual(state, row['state'])
def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() # set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("cert", None) self.setdefault("key", None) self.setdefault('capath', None) self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host'])
def _postRequest(self, url, params, verb='POST', verbose=0): "Helper function to POST request to given URL" mgr = RequestHandler(logger=self.logger) headers = copy(self.configDict['headers']) headers.update({"Authorization": self._token}) try: data = mgr.getdata(url, params, headers, verb=verb, verbose=verbose) return json.loads(data) except Exception as exc: self.logger.error("Failed to retrieve data from MonIT. Error: %s", str(exc)) return None
class MicroServiceTest(unittest.TestCase): "Unit test for MicroService module" def setUp(self): "Setup MicroService for testing" self.app = ServiceManager() config = TestConfig manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager' config.views.data.manager = manager config.manager = manager mount = '/microservice' self.mgr = RequestHandler() self.port = config.main.port self.url = 'http://localhost:%s%s/data' % (self.port, mount) cherrypy.config["server.socket_port"] = self.port self.server = RestApiHub(self.app, config, mount) cherrypy.tree.mount(self.server, mount) cherrypy.engine.start() def tearDown(self): "Tear down MicroService" cherrypy.engine.exit() cherrypy.engine.stop() def postRequest(self, params): "Perform POST request to our MicroService" headers = {'Content-type': 'application/json'} print("### post call %s params=%s headers=%s" % (self.url, params, headers)) data = self.mgr.getdata(self.url, params=params, headers=headers, \ verb='POST', cert=cert(), ckey=ckey()) print("### post call data %s" % data) return json.loads(data) def test_getState(self): "Test function for getting state of the MicroService" url = '%s/status' % self.url data = self.mgr.getdata(url, params={}) state = "bla" data = {"request":{"state": state}} self.postRequest(data) data = self.mgr.getdata(url, params={}) data = json.loads(data) print("### url=%s, data=%s" % (url, data)) for row in data['result']: if 'state' in row: self.assertEqual(state, row['state'])
def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n' self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n'
def alterSubscription(phedexid, decision, comments, nodes=None): "Helper function to alter subscriptions for given phedex id and nodes" mgr = RequestHandler() headers = {'Accept': 'application/json'} nodes = nodes if nodes else getNodesForId(phedexid) params = { 'decision': decision, 'request': phedexid, 'node': ','.join(nodes), 'comments': comments } url = '%s/updaterequest' data = mgr.getdata(url, params, headers, ckey=ckey(), cert=cert(), verb='POST') result = json.loads(data) if not result: return False if 'already' in result: return True return result
def renewRucioToken(rucioAuthUrl, userToken): """ Provided a user Rucio token, check it's lifetime and extend it by another hour :param rucioAuthUrl: url to the rucio authentication server :param rucioAcct: rucio account to be used :return: a datetime.datetime object with the new token lifetime """ params = {} headers = {"X-Rucio-Auth-Token": userToken} url = '%s/auth/validate' % rucioAuthUrl logging.info("Renewing the Rucio token...") mgr = RequestHandler() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) try: newExpiration = eval(res)['lifetime'] except Exception as exc: raise RuntimeError("Failed to renew Rucio token. Response: {} Error: {}".format(res, str(exc))) return newExpiration
def getDataFromURL(url, proxyfilename = None): """ Read the content of a URL and return it as a string. Type of content should not matter, it can be a json file or a tarball for example. url: the link you would like to retrieve proxyfilename: the x509 proxy certificate to be used in case auth is required Returns binary data encoded as a string, which can be later processed according to what kind of content it represents. """ # Get rid of unicode which may cause problems in pycurl stringUrl = url.encode('ascii') reqHandler = RequestHandler() _, data = reqHandler.request(url=stringUrl, params={}, ckey=proxyfilename, cert=proxyfilename, capath=HTTPRequests.getCACertPath()) return data
def agentsSites(url): "Return list of sites known in CMS WMAgents" sites_ready_in_agent = set() headers = {'Accept': 'application/json'} params = {} mgr = RequestHandler() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res) agents = {} for r in [i['value'] for i in data['rows']]: team = r['agent_team'] if team != 'production': continue agents.setdefault(team, []).append(r) for team, agents in agents.items(): for agent in agents: if agent['status'] != 'ok': continue for site, sinfo in agent['WMBS_INFO']['thresholds'].iteritems(): if sinfo['state'] in ['Normal']: sites_ready_in_agent.add(site) return sites_ready_in_agent
def setUp(self): "Setup MicroService for testing" self.app = ServiceManager() config = TestConfig manager = 'WMCore_t.Services_t.MicroService_t.MicroService_t.ServiceManager' config.views.data.manager = manager config.manager = manager mount = '/microservice' self.mgr = RequestHandler() self.port = config.main.port self.url = 'http://localhost:%s%s/data' % (self.port, mount) cherrypy.config["server.socket_port"] = self.port self.server = RestApiHub(self.app, config, mount) cherrypy.tree.mount(self.server, mount) cherrypy.engine.start()
def getRucioToken(rucioAuthUrl, rucioAcct): """ Provided a Rucio account, fetch a token from the authentication server :param rucioAuthUrl: url to the rucio authentication server :param rucioAcct: rucio account to be used :return: an integer with the expiration time in EPOCH """ params = {} headers = {"X-Rucio-Account": rucioAcct} url = '%s/auth/x509' % rucioAuthUrl logging.info("Requesting a token to Rucio for account: %s, against url: %s", rucioAcct, rucioAuthUrl) mgr = RequestHandler() res = mgr.getheader(url, params=params, headers=headers, ckey=ckey(), cert=cert()) if res.getReason() == "OK": userToken = res.getHeaderKey('X-Rucio-Auth-Token') tokenExpiration = res.getHeaderKey('X-Rucio-Auth-Token-Expires') logging.info("Retrieved Rucio token valid until: %s", tokenExpiration) # convert the human readable expiration time to EPOCH time tokenExpiration = datetime.datetime.strptime(tokenExpiration, "%a, %d %b %Y %H:%M:%S %Z") tokenExpiration = int(tokenExpiration.strftime('%s')) return userToken, tokenExpiration raise RuntimeError("Failed to acquire a Rucio token. Error: {}".format(res.getReason()))
def __init__(self, url = 'http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() #set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(\ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) # and then get the URL opener self.setdefault("conn", self._getURLOpener())
class AuxCacheUpdateTasks(CherryPyPeriodicTask): """ Updates Aux db update periodically. (i.e. TagCollector) """ def __init__(self, rest, config): super(AuxCacheUpdateTasks, self).__init__(config) self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger) self.mgr = RequestHandler() def setConcurrentTasks(self, config): """ sets the list of functions which """ self.concurrentTasks = [{ 'func': self.updateAuxiliarDocs, 'duration': config.tagCollectDuration }] def updateAuxiliarDocs(self, config): """ Update the central couch database with auxiliary documents that need to be constanly updated whenever an update is made at the data source """ self.logger.info("Updating auxiliary couch documents ...") self.reqmgrAux.populateCMSSWVersion(config.tagcollect_url, **config.tagcollect_args) try: data = self.mgr.getdata(config.unified_url, params={}, headers={'Accept': 'application/json'}) data = json.loads(data) except Exception as ex: msg = "Failed to retrieve unified configuration from github. Error: %s" % str( ex) msg += "\nRetrying again in the next cycle" self.logger.error(msg) return self.reqmgrAux.updateUnifiedConfig(data, docName="config")
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, cookie=cookie) self.assertTrue(header.status, 200)
def __init__(self, rest, config): super(AuxCacheUpdateTasks, self).__init__(config) self.reqmgrAux = ReqMgrAux(config.reqmgr2_url, logger=self.logger) self.mgr = RequestHandler()
class MicroServiceTest(unittest.TestCase): "Unit test for MicroService module" def setUp(self): "Setup MicroService for testing" self.managerName = "ServiceManager" config = TestConfig manager = 'WMCore_t.MicroService_t.MicroService_t.%s' % self.managerName config.views.data.manager = manager config.manager = manager mount = '/microservice/data' self.mgr = RequestHandler() self.port = config.main.port self.url = 'http://localhost:%s%s' % (self.port, mount) cherrypy.config["server.socket_port"] = self.port self.app = ServiceManager(config) self.server = RestApiHub(self.app, config, mount) cherrypy.tree.mount(self.server, mount) cherrypy.engine.start() def tearDown(self): "Tear down MicroService" cherrypy.engine.stop() cherrypy.engine.exit() def postRequest(self, apiName, params): "Perform POST request to our MicroService" headers = {'Content-type': 'application/json'} url = self.url + "/%s" % apiName data = self.mgr.getdata(url, params=params, headers=headers, \ verb='POST', cert=cert(), ckey=ckey(), encode=True, decode=True) print("### post call data %s" % data) return data def testGetStatus(self): "Test function for getting state of the MicroService" api = "status" url = '%s/%s' % (self.url, api) params = {} data = self.mgr.getdata(url, params=params, encode=True, decode=True) self.assertEqual(data['result'][0]['microservice'], self.managerName) self.assertEqual(data['result'][0]['api'], api) params = {"service": "transferor"} data = self.mgr.getdata(url, params=params, encode=True, decode=True) self.assertEqual(data['result'][0]['microservice'], self.managerName) self.assertEqual(data['result'][0]['api'], api) def testGetInfo(self): "Test function for getting state of the MicroService" api = "status" url = '%s/%s' % (self.url, api) params = {} data = self.mgr.getdata(url, params=params, encode=True, decode=True) self.assertEqual(data['result'][0]['microservice'], self.managerName) self.assertEqual(data['result'][0]['api'], api) params = {"request": "fake_request_name"} data = self.mgr.getdata(url, params=params, encode=True, decode=True) self.assertEqual(data['result'][0]['microservice'], self.managerName) self.assertEqual(data['result'][0]['api'], api) def testPostCall(self): "Test function for getting state of the MicroService" api = "status" data = {"request": "fake_request_name"} data = self.postRequest(api, data) self.assertDictEqual(data['result'][0], { 'status': 'OK', 'api': 'info' })
class MSRuleCleaner(MSCore): """ MSRuleCleaner.py class provides the logic used to clean the Rucio block level data placement rules created by WMAgent. """ def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig[ 'enableRealMode'] else "DryRunMode" self.emailAlert = EmailAlert(self.msConfig) self.curlMgr = RequestHandler() # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.setClean), Functor(self.archive) ]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [ self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock ] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': 0} def resetCounters(self): """ A simple function for zeroing the cleaned and archived counters. """ for pline in self.cleanuplines: self.wfCounters['cleaned'][pline.name] = 0 self.wfCounters['archived'] = 0 def execute(self, reqStatus): """ Executes the whole ruleCleaner logic :return: summary """ # start threads in MSManager which should call this method summary = dict(RULECLEANER_REPORT) self.currThread = current_thread() self.currThreadIdent = self.currThread.name self.updateReportDict(summary, "thread_id", self.currThreadIdent) self.resetCounters() self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode) # Build the list of workflows to work on: try: requestRecords = {} for status in reqStatus: requestRecords.update(self.getRequestRecords(status)) except Exception as err: # general error msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str( err) self.logger.exception(msg) self.updateReportDict(summary, "error", msg) # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords try: totalNumRequests, cleanNumRequests, archivedNumRequests = self._execute( requestRecords) msg = "\nNumber of processed workflows: %s." msg += "\nNumber of properly cleaned workflows: %s." msg += "\nNumber of archived workflows: %s." self.logger.info(msg, totalNumRequests, cleanNumRequests, archivedNumRequests) self.updateReportDict(summary, "total_num_requests", totalNumRequests) self.updateReportDict(summary, "clean_num_requests", cleanNumRequests) self.updateReportDict(summary, "archived_num_requests", archivedNumRequests) except Exception as ex: msg = "Unknown exception while running MSRuleCleaner thread Error: %s" self.logger.exception(msg, str(ex)) self.updateReportDict(summary, "error", msg) return summary def _execute(self, reqRecords): """ Executes the MSRuleCleaner pipelines based on the workflow status :param reqList: A list of RequestRecords to work on :return: a tuple with: number of properly cleaned requests number of processed workflows number of archived workflows """ # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival # Pipelines are executed sequentially in the above order. # This way we assure ourselves that we archive only workflows # that have accomplished the needed cleanup cleanNumRequests = 0 totalNumRequests = 0 # Call the workflow dispatcher: for _, req in reqRecords.items(): wflow = MSRuleCleanerWflow(req) self._dispatchWflow(wflow) msg = "\n----------------------------------------------------------" msg += "\nMSRuleCleanerWflow: %s" msg += "\n----------------------------------------------------------" self.logger.debug(msg, pformat(wflow)) totalNumRequests += 1 if self._checkClean(wflow): cleanNumRequests += 1 # Report the counters: for pline in self.cleanuplines: msg = "Workflows cleaned by pipeline: %s: %d" self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name]) archivedNumRequests = self.wfCounters['archived'] self.logger.info("Workflows archived: %d", self.wfCounters['archived']) return totalNumRequests, cleanNumRequests, archivedNumRequests def _dispatchWflow(self, wflow): """ A function intended to dispatch a workflow (e.g based on its status) through one or more functional pipelines in case there is some more complicated logic involved in the order we execute them but not just a sequentially """ self.logger.debug("Dispatching workflow: %s", wflow['RequestName']) # NOTE: The following dispatch logic is a subject to be changed at any time # Resolve: # NOTE: First resolve any preliminary flags that will be needed further # in the logic of the _dispatcher() itself if wflow['RequestStatus'] == 'announced': self.getMSOutputTransferInfo(wflow) # Clean: # Do not clean any Resubmission, but still let them be archived if wflow['RequestType'] == 'Resubmission': wflow['ForceArchive'] = True msg = "Skipping cleanup step for workflow: %s - RequestType is %s." msg += " Will try to archive it directly." self.logger.info(msg, wflow['RequestName'], wflow['RequestType']) elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']: # NOTE: We do not check the ParentageResolved flag for these # workflows, but we do need to clean output data placement # rules from the agents for them for pline in self.agentlines: try: pline.run(wflow) except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 elif wflow['RequestStatus'] == 'announced' and not wflow[ 'ParentageResolved']: # NOTE: We skip workflows which are not having 'ParentageResolved' # flag, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow[ 'RequestStatus'] == 'announced' and not wflow['TransferDone']: # NOTE: We skip workflows which have not yet finalised their TransferStatus # in MSOutput, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced': for pline in self.cleanuplines: try: pline.run(wflow) except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 else: # We shouldn't be here: msg = "Skipping workflow: %s - " msg += "Does not fall under any of the defined categories." self.logger.error(msg, wflow['RequestName']) # Archive: try: self.plineArchive.run(wflow) self.wfCounters['archived'] += 1 except MSRuleCleanerArchival as ex: msg = "%s: Archival Error: %s. " msg += " Will retry again in the next cycle." self.logger.error(msg, wflow['PlineMarkers'][-1], ex.message()) except Exception as ex: msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex)) def setPlineMarker(self, wflow, pName): """ A function intended to mark which is the pipeline currently working on the workflow. It is supposed to be called always as a first function in the pipeline. :param wflow: A MSRuleCleaner workflow representation :param pName: The name of the functional pipeline :return wflow: """ # NOTE: The current functional pipeline MUST always be appended at the # end of the 'PlineMarkers' list # First get rid of the default: if not wflow['PlineMarkers']: wflow['PlineMarkers'] = [] # Then push our current value into the markers list: wflow['PlineMarkers'].append(pName) # Populate the list of flags to be used later: if pName not in wflow['RulesToClean']: if pName in self.cleanupPipeNames: wflow['RulesToClean'][pName] = [] if pName not in wflow['CleanupStatus']: if pName in self.cleanupPipeNames: wflow['CleanupStatus'][pName] = False return wflow def _checkClean(self, wflow): """ An auxiliary function used to only check the temporary cleanup status. It basically takes the pipelines registered in 'PlineMarkers' that have already worked on the workflow as a mask and applies this mask over the set of flags in the 'CleanupStatus' field and then reduces the result to a single bool value """ # NOTE: This is one of the few functions taking a workflow as an argument # but returning a bool, since it is an auxiliary function and is not # supposed to be called as a standalone function in a pipeline. # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows # are also counted as properly cleaned and can trigger archival later # Build a list of bool flags based on the mask of PlineMarkers cleanFlagsList = [ wflow['CleanupStatus'][key] for key in wflow['PlineMarkers'] if key in wflow['CleanupStatus'].keys() ] # If no one have worked on the workflow set the clean status to false if not wflow['PlineMarkers']: cleanStatus = False # If we have a mask longer than the list of flags avoid false positives # because of the behavior explained above - `all([]) == True` elif not cleanFlagsList: cleanStatus = False # Figure out the final value else: cleanStatus = all(cleanFlagsList) return cleanStatus def setClean(self, wflow): """ A function to set the 'IsClean' flag based on the status from all the pipelines which have worked on the workflow (and have put their markers in the 'PlineMarkers' list) :param wflow: A MSRuleCleaner workflow representation :return wflow: """ wflow['IsClean'] = self._checkClean(wflow) return wflow def archive(self, wflow): """ Move the workflow to the proper archived status after checking the full cleanup status :param wflow: A MSRuleCleaner workflow representation :param archStatus: Target status to transition after archival :return wflow: """ # NOTE: check allowed status transitions with: # https://github.com/dmwm/WMCore/blob/5961d2229b1e548e58259c06af154f33bce36c68/src/python/WMCore/ReqMgr/DataStructs/RequestStatus.py#L171 if not (wflow['IsClean'] or wflow['ForceArchive']): msg = "Not properly cleaned workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchival(msg) # Check the available status transitions before we decide the final status targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get( wflow['RequestStatus'], []) self.logger.info("targetStatusList: %s", targetStatusList) return wflow def getMSOutputTransferInfo(self, wflow): """ Fetches the transfer information from the MSOutput REST interface for the given workflow. :param wflow: A MSRuleCleaner workflow representation :return wflow: """ headers = {'Accept': 'application/json'} params = {} url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'], wflow['RequestName']) try: res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res)['result'][0] transferInfo = data['transferDoc'] except Exception as ex: msg = "General exception while fetching TransferInfo from MSOutput for %s. " msg += "Error: %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) if transferInfo is not None and transferInfo[ 'TransferStatus'] == 'done': wflow['TransferDone'] = True return wflow def getRucioRules(self, wflow, gran, rucioAcct): """ Queries Rucio and builds the relevant list of blocklevel rules for the given workflow :param wflow: A MSRuleCleaner workflow representation :param gran: Data granularity to search for Rucio rules. Possible values: 'block' || 'container' :return: wflow """ currPline = wflow['PlineMarkers'][-1] # Find all the output placement rules created by the agents for dataCont in wflow['OutputDatasets']: if gran == 'container': for rule in self.rucio.listDataRules(dataCont, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) elif gran == 'block': try: blocks = self.rucio.getBlocksInContainer(dataCont) for block in blocks: for rule in self.rucio.listDataRules( block, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) except WMRucioDIDNotFoundException: msg = "Container: %s not found in Rucio for workflow: %s." self.logger.info(msg, dataCont, wflow['RequestName']) return wflow def cleanRucioRules(self, wflow): """ Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner workflow representation. And fills the relevant Cleanup Status. :param wflow: A MSRuleCleaner workflow representation :return: wflow """ # NOTE: The function should be called independently and sequentially from # The Input and the respective BlockLevel pipelines. # NOTE: The current functional pipeline is always the last one in the PlineMarkers list currPline = wflow['PlineMarkers'][-1] delResults = [] if self.msConfig['enableRealMode']: for rule in wflow['RulesToClean'][currPline]: self.logger.info("%s: Deleting ruleId: %s ", currPline, rule) delResult = self.rucio.deleteRule(rule) delResults.append(delResult) if not delResult: self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule) else: for rule in wflow['RulesToClean'][currPline]: delResults.append(True) self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule) # Set the cleanup flag: wflow['CleanupStatus'][currPline] = all(delResults) # ---------------------------------------------------------------------- # FIXME : To be removed once the plineMSTrBlock && plineMSTrCont are # developed if wflow['CleanupStatus'][currPline] in [ 'plineMSTrBlock', 'plineMSTrCont' ]: wflow['CleanupStatus'][currPline] = True # ---------------------------------------------------------------------- return wflow def getRequestRecords(self, reqStatus): """ Queries ReqMgr2 for requests in a given status. :param reqStatus: The status for the requests to be fetched from ReqMgr2 :return requests: A dictionary with all the workflows in the given status """ self.logger.info("Fetching requests in status: %s", reqStatus) result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True) if not result: requests = {} else: requests = result[0] self.logger.info(' retrieved %s requests in status: %s', len(requests), reqStatus) return requests
def __init__(self, config, quiet): """ Initialise class members """ self.config = config.General self.max_files_per_block = self.config.max_files_per_block #self.userCert = self.config.opsCert #self.userKey = self.config.opsKey self.block_publication_timeout = self.config.block_closure_timeout self.lfn_map = {} self.force_publication = False self.force_failure = False #TODO: logger! def createLogdir(dirname): """ Create the directory dirname ignoring erors in case it exists. Exit if the directory cannot be created. """ try: os.mkdir(dirname) except OSError as ose: if ose.errno != 17: #ignore the "Directory already exists error" print(str(ose)) print("The task worker need to access the '%s' directory" % dirname) sys.exit(1) def setRootLogger(quiet, debug): """Sets the root logger with the desired verbosity level The root logger logs to logs/twlog.txt and every single logging instruction is propagated to it (not really nice to read) :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" createLogdir('logs') createLogdir('logs/processes') createLogdir('logs/tasks') logHandler = MultiProcessingLog('logs/log.txt', when='midnight') logFormatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = setProcessLogger("master") logger.debug("PID %s.", os.getpid()) logger.debug("Logging level initialized to %s.", loglevel) return logger self.cache_area = self.config.cache_area self.logger = setRootLogger(quiet, True) try: self.oracleDB = HTTPRequests(self.config.oracleDB, self.config.opsCert, self.config.opsKey) self.logger.debug('Contacting OracleDB:' + self.config.oracleDB) except: self.logger.exception('Failed when contacting Oracle') raise try: self.connection = RequestHandler(config={'timeout': 900, 'connecttimeout' : 900}) except Exception as ex: msg = "Error initializing the connection" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg)
class AlertManagerAPI(object): """ A class used to send alerts via the MONIT AlertManager API """ def __init__(self, alertManagerUrl, logger=None): self.alertManagerUrl = alertManagerUrl # sender's hostname is added as an annotation self.hostname = socket.gethostname() self.mgr = RequestHandler() self.ltz = LocalTimezone() self.headers = {"Content-Type": "application/json"} self.validSeverity = ["high", "medium", "low"] self.logger = logger if logger else logging.getLogger() def sendAlert(self, alertName, severity, summary, description, service, tag="wmcore", endSecs=600, generatorURL=""): """ :param alertName: a unique name for the alert :param severity: low, medium, high :param summary: a short description of the alert :param description: a longer informational message with details about the alert :param service: the name of the service firing an alert :param tag: a unique tag used to help route the alert :param endSecs: how many minutes until the alarm is silenced :param generatorURL: this URL will be sent to AlertManager and configured as a clickable "Source" link in the web interface AlertManager JSON format reference: https://www.prometheus.io/docs/alerting/latest/clients/ [ { "labels": { "alertname": "<requiredAlertName>", "<labelname>": "<labelvalue>", ... }, "annotations": { "<labelname>": "<labelvalue>", ... }, "startsAt": "<rfc3339>", # optional, will be current time if not present "endsAt": "<rfc3339>", "generatorURL": "<generator_url>" # optional }, ] """ if not self._isValidSeverity(severity): return False request = [] alert = {} labels = {} annotations = {} # add labels labels["alertname"] = alertName labels["severity"] = severity labels["tag"] = tag labels["service"] = service alert["labels"] = labels # add annotations annotations["hostname"] = self.hostname annotations["summary"] = summary annotations["description"] = description alert["annotations"] = annotations # In python3 we won't need the LocalTimezone class # Will change to d = datetime.now().astimezone() + timedelta(seconds=endSecs) d = datetime.now(self.ltz) + timedelta(seconds=endSecs) alert["endsAt"] = d.isoformat("T") alert["generatorURL"] = generatorURL request.append(alert) # need to do this because pycurl_manager only accepts dict and encoded strings type params = json.dumps(request) res = self.mgr.getdata(self.alertManagerUrl, params=params, headers=self.headers, verb='POST') return res def _isValidSeverity(self, severity): """ Used to check if the severity of the alert matches the valid levels: low, medium, high :param severity: severity of the alert :return: True or False """ if severity not in self.validSeverity: logging.critical( "Alert submitted to AlertManagerAPI with invalid severity: %s", severity) return False return True
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() #set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(\ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) # and then get the URL opener self.setdefault("conn", self._getURLOpener()) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ #TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: #WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. #assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" # There must be a better way to do this... def f(): """Dummy function""" pass if verb != 'GET' and data: if type(encoder) == type(self.get) or type(encoder) == type(f): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t #encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: #encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert type(encoded_data) == type('string'), \ "Data in makeRequest is %s and not encoded to a string" \ % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) except (socket.error, AttributeError): # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut [conn.close() for conn in self['conn'].connections.values()] self['conn'] = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = self['conn'].request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: # socket/httplib really screwed up - nuclear option self['conn'].connections = {} raise socket.error, 'Error contacting: %s' \ % self.getDomainName() if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if type(decoder) == type(self.makeRequest) or type(decoder) == type(f): result = decoder(result) elif decoder != False: result = self.decode(result) #TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' \ % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') # object to store temporary directory - cleaned up on destruction self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception, ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http
class Worker(object): """ """ def __init__(self, config, quiet): """ Initialise class members """ self.config = config.General self.max_files_per_block = self.config.max_files_per_block self.userProxy = self.config.opsProxy self.block_publication_timeout = self.config.block_closure_timeout self.lfn_map = {} self.force_publication = False self.force_failure = False #TODO: logger! def createLogdir(dirname): """ Create the directory dirname ignoring erors in case it exists. Exit if the directory cannot be created. """ try: os.mkdir(dirname) except OSError as ose: if ose.errno != 17: #ignore the "Directory already exists error" print(str(ose)) print("The task worker need to access the '%s' directory" % dirname) sys.exit(1) def setRootLogger(quiet, debug): """Sets the root logger with the desired verbosity level The root logger logs to logs/twlog.txt and every single logging instruction is propagated to it (not really nice to read) :arg bool quiet: it tells if a quiet logger is needed :arg bool debug: it tells if needs a verbose logger :return logger: a logger with the appropriate logger level.""" createLogdir('logs') createLogdir('logs/processes') createLogdir('logs/tasks') logHandler = MultiProcessingLog('logs/log.txt', when='midnight') logFormatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s") logHandler.setFormatter(logFormatter) logging.getLogger().addHandler(logHandler) loglevel = logging.INFO if quiet: loglevel = logging.WARNING if debug: loglevel = logging.DEBUG logging.getLogger().setLevel(loglevel) logger = setProcessLogger("master") logger.debug("PID %s.", os.getpid()) logger.debug("Logging level initialized to %s.", loglevel) return logger self.cache_area = self.config.cache_area self.logger = setRootLogger(quiet, True) try: self.oracleDB = HTTPRequests(self.config.oracleDB, self.config.opsProxy, self.config.opsProxy) self.logger.debug('Contacting OracleDB:' + self.config.oracleDB) except: self.logger.exception('Failed when contacting Oracle') raise try: self.connection = RequestHandler(config={'timeout': 900, 'connecttimeout' : 900}) except Exception as ex: msg = "Error initializing the connection" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) def active_tasks(self, db): fileDoc = {} fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'acquirePublication' self.logger.debug("Retrieving publications from oracleDB") results = '' try: results = db.post(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) except Exception as ex: self.logger.error("Failed to acquire publications \ from oracleDB: %s" %ex) return [] fileDoc = dict() fileDoc['asoworker'] = self.config.asoworker fileDoc['subresource'] = 'acquiredPublication' fileDoc['grouping'] = 0 fileDoc['limit'] = 100000 self.logger.debug("Retrieving max.100000 acquired puclications from oracleDB") result = [] try: results = db.get(self.config.oracleFileTrans, data=encodeRequest(fileDoc)) result.extend(oracleOutputMapping(results)) except Exception as ex: self.logger.error("Failed to acquire publications \ from oracleDB: %s" %ex) return [] self.logger.debug("publen: %s" % len(result)) self.logger.debug("%s acquired puclications retrieved" % len(result)) #TODO: join query for publisher (same of submitter) unique_tasks = [list(i) for i in set(tuple([x['username'], x['user_group'], x['user_role'], x['taskname']] ) for x in result if x['transfer_state'] == 3)] info = [] for task in unique_tasks: info.append([x for x in result if x['taskname'] == task[3]]) return zip(unique_tasks, info) def getPublDescFiles(self, workflow, lfn_ready): """ Download and read the files describing what needs to be published """ data = {} data['taskname'] = workflow data['filetype'] = 'EDM' out = [] # divide lfn per chunks, avoiding URI-too long exception def chunks(l, n): """ Yield successive n-sized chunks from l. :param l: list to splitt in chunks :param n: chunk size :return: yield the next list chunk """ for i in range(0, len(l), n): yield l[i:i + n] for lfn_ in chunks(lfn_ready, 50): data['lfn'] = lfn_ try: res = self.oracleDB.get('/crabserver/dev/filemetadata', data=encodeRequest(data, listParams=["lfn"])) res = res[0] except Exception as ex: self.logger.error("Error during metadata retrieving: %s" %ex) print(len(res['result'])) for obj in res['result']: if isinstance(obj, dict): out.append(obj) else: #print type(obj) out.append(json.loads(str(obj))) return out def algorithm(self): """ 1. Get a list of users with files to publish from the couchdb instance 2. For each user get a suitably sized input for publish 3. Submit the publish to a subprocess """ tasks = self.active_tasks(self.oracleDB) self.logger.debug('kicking off pool %s' % [x[0][3] for x in tasks]) processes = [] try: for task in tasks: p = Process(target=self.startSlave, args=(task,)) p.start() processes.append(p) for proc in processes: proc.join() except: self.logger.exception("Error during process mapping") def startSlave(self, task): # TODO: lock task! # - process logger logger = setProcessLogger(str(task[0][3])) logger.info("Process %s is starting. PID %s", task[0][3], os.getpid()) self.force_publication = False workflow = str(task[0][3]) wfnamemsg = "%s: " % (workflow) if len(task[1]) > self.max_files_per_block: self.force_publication = True msg = "All datasets have more than %s ready files." % (self.max_files_per_block) msg += " No need to retrieve task status nor last publication time." logger.info(wfnamemsg+msg) else: msg = "At least one dataset has less than %s ready files." % (self.max_files_per_block) logger.info(wfnamemsg+msg) # Retrieve the workflow status. If the status can not be retrieved, continue # with the next workflow. workflow_status = '' url = '/'.join(self.cache_area.split('/')[:-1]) + '/workflow' msg = "Retrieving status from %s" % (url) logger.info(wfnamemsg+msg) buf = cStringIO.StringIO() header = {"Content-Type":"application/json"} data = {'workflow': workflow}#, 'subresource': 'taskads'} try: _, res_ = self.connection.request(url, data, header, doseq=True, ckey=self.userProxy, cert=self.userProxy )# , verbose=True) # for debug except Exception as ex: if self.config.isOracle: logger.exception('Error retrieving status from cache.') return 0 msg = "Status retrieved from cache. Loading task status." logger.info(wfnamemsg+msg) try: buf.close() res = json.loads(res_) workflow_status = res['result'][0]['status'] msg = "Task status is %s." % workflow_status logger.info(wfnamemsg+msg) except ValueError: msg = "Workflow removed from WM." logger.error(wfnamemsg+msg) workflow_status = 'REMOVED' except Exception as ex: msg = "Error loading task status!" msg += str(ex) msg += str(traceback.format_exc()) logger.error(wfnamemsg+msg) # If the workflow status is terminal, go ahead and publish all the ready files # in the workflow. if workflow_status in ['COMPLETED', 'FAILED', 'KILLED', 'REMOVED']: self.force_publication = True if workflow_status in ['KILLED', 'REMOVED']: self.force_failure = True msg = "Considering task status as terminal. Will force publication." logger.info(wfnamemsg+msg) # Otherwise... else: msg = "Task status is not considered terminal." logger.info(wfnamemsg+msg) msg = "Getting last publication time." logger.info(wfnamemsg+msg) # Get when was the last time a publication was done for this workflow (this # should be more or less independent of the output dataset in case there are # more than one). last_publication_time = None data = {} data['workflow'] = workflow data['subresource'] = 'search' try: result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'), data=encodeRequest(data)) logger.debug("task: %s " % str(result[0])) logger.debug("task: %s " % getColumn(result[0], 'tm_last_publication')) except Exception as ex: logger.error("Error during task doc retrieving: %s" %ex) if last_publication_time: date = oracleOutputMapping(result)['last_publication'] seconds = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f").timetuple() last_publication_time = time.mktime(seconds) msg = "Last publication time: %s." % str(last_publication_time) logger.debug(wfnamemsg+msg) # If this is the first time a publication would be done for this workflow, go # ahead and publish. if not last_publication_time: self.force_publication = True msg = "There was no previous publication. Will force publication." logger.info(wfnamemsg+msg) # Otherwise... else: last = last_publication_time msg = "Last published block: %s" % (last) logger.debug(wfnamemsg+msg) # If the last publication was long time ago (> our block publication timeout), # go ahead and publish. now = int(time.time()) - time.timezone time_since_last_publication = now - last hours = int(time_since_last_publication/60/60) minutes = int((time_since_last_publication - hours*60*60)/60) timeout_hours = int(self.block_publication_timeout/60/60) timeout_minutes = int((self.block_publication_timeout - timeout_hours*60*60)/60) msg = "Last publication was %sh:%sm ago" % (hours, minutes) if time_since_last_publication > self.block_publication_timeout: self.force_publication = True msg += " (more than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes) msg += " Will force publication." else: msg += " (less than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes) msg += " Not enough to force publication." logger.info(wfnamemsg+msg) #logger.info(task[1]) try: if self.force_publication: # - get info active_ = [{'key': [x['username'], x['user_group'], x['user_role'], x['taskname']], 'value': [x['destination'], x['source_lfn'], x['destination_lfn'], x['input_dataset'], x['dbs_url'], x['last_update'] ]} for x in task[1] if x['transfer_state'] == 3 and x['publication_state'] not in [2, 3, 5]] lfn_ready = [] wf_jobs_endtime = [] pnn, input_dataset, input_dbs_url = "", "", "" for active_file in active_: job_end_time = active_file['value'][5] if job_end_time and self.config.isOracle: wf_jobs_endtime.append(int(job_end_time) - time.timezone) elif job_end_time: wf_jobs_endtime.append(int(time.mktime(time.strptime(str(job_end_time), '%Y-%m-%d %H:%M:%S'))) - time.timezone) source_lfn = active_file['value'][1] dest_lfn = active_file['value'][2] self.lfn_map[dest_lfn] = source_lfn if not pnn or not input_dataset or not input_dbs_url: pnn = str(active_file['value'][0]) input_dataset = str(active_file['value'][3]) input_dbs_url = str(active_file['value'][4]) lfn_ready.append(dest_lfn) userDN = '' username = task[0][0] user_group = "" if task[0][1]: user_group = task[0][1] user_role = "" if task[0][2]: user_role = task[0][2] logger.debug("Trying to get DN %s %s %s" % (username, user_group, user_role)) try: userDN = getDNFromUserName(username, logger) except Exception as ex: msg = "Error retrieving the user DN" msg += str(ex) msg += str(traceback.format_exc()) logger.error(msg) return 1 # Get metadata toPublish = [] publDescFiles_list = self.getPublDescFiles(workflow, lfn_ready) for file_ in active_: for _, doc in enumerate(publDescFiles_list): #logger.info(type(doc)) #logger.info(doc) if doc["lfn"] == file_["value"][2]: doc["User"] = username doc["Group"] = file_["key"][1] doc["Role"] = file_["key"][2] doc["UserDN"] = userDN doc["Destination"] = file_["value"][0] doc["SourceLFN"] = file_["value"][1] toPublish.append(doc) with open("/tmp/"+workflow+'.json', 'w') as outfile: json.dump(toPublish, outfile) logger.info(". publisher.sh %s" % (workflow)) subprocess.call(["/bin/bash", "/data/user/MicroASO/microPublisher/python/publisher.sh", workflow]) except: logger.exception("Exception!") return 0
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n' self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, cookie=cookie) self.assertTrue(header.status, 200) def testContinue(self): """ Test HTTP exit code 100 - Continue """ header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertIsNone(getattr(resp, "status", None)) self.assertEqual(resp.reason, "") self.assertFalse(resp.fromcache) self.assertIn("CMS-Server-Time", resp.header) self.assertIn("Date", resp.header) self.assertEqual(resp.header['Content-Type'], 'text/html') self.assertEqual(resp.header['Server'], 'Apache') self.assertEqual(resp.header['Transfer-Encoding'], 'chunked') return def testOK(self): """ Test HTTP exit code 200 - OK """ header = "HTTP/1.1 200 OK\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) return def testForbidden(self): """ Test HTTP exit code 403 - Forbidden """ header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 403) self.assertEqual(resp.reason, "Forbidden") self.assertFalse(resp.fromcache) return def testOKCRIC(self): """ Test HTTP exit code 200 - OK for a CRIC response header """ header = "HTTP/1.1 200 OK\r\n" + self.cricheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("sessionid", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testUnavailableCRICHTTP(self): """ Test HTTP exit code 503 - Service Unavailable for a CRIC response header when it also contains a HTTP string in the Set-Cookie header section """ header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader resp = ResponseHeader(header) self.assertEqual(resp.status, 503) self.assertEqual(resp.reason, "Service Unavailable") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return
def getdata(url, params, headers=None): "Helper function to get data from the service" ckey, cert = getKeyCertFromEnv() mgr = RequestHandler() res = mgr.getdata(url, params=params, headers=headers, ckey=ckey, cert=cert) return json.loads(res)
class PyCurlManager(unittest.TestCase): """Test pycurl_manager module""" def setUp(self): "initialization" self.mgr = RequestHandler() #self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') #self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem') self.ckey = getKeyCertFromEnv()[0] self.cert = getKeyCertFromEnv()[1] self.cricheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: sessionid=bc1xu8zi5rbbsd5fgjuklb2tk2r3f6tw; expires=Sun, 11-Nov-2018 14:50:29 GMT; httponly; Max-Age=432000; Path=/\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' self.dbsheader = 'Date: Tue, 06 Nov 2018 14:39:07 GMT\r\nServer: Apache\r\nCMS-Server-Time: D=1503 t=1541515147806112\r\nTransfer-Encoding: chunked\r\nContent-Type: text/html\r\n\r\n' self.HTTPheader = 'Date: Tue, 06 Nov 2018 14:50:29 GMT\r\nServer: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips mod_wsgi/3.4 Python/2.7.5 mod_gridsite/2.3.4\r\nVary: Cookie\r\nX-Frame-Options: SAMEORIGIN\r\nSet-Cookie: GRIDHTTP_PASSCODE=2c6da9c96efa2ad0farhda; domain=cms-cric.cern.ch; path=/; secure\r\nContent-Length: 32631\r\nContent-Type: application/json\r\n\r\n' def testMulti(self): """ Test fetch of several urls at once, one of the url relies on CERN SSO. """ tfile = tempfile.NamedTemporaryFile() url1 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/help" url2 = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/datatiers" url3 = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" cern_sso_cookie(url3, tfile.name, self.cert, self.ckey) cookie = {url3: tfile.name} urls = [url1, url2, url3] data = getdata(urls, self.ckey, self.cert, cookie=cookie) headers = 0 for row in data: if '200 OK' in row['headers']: headers += 1 self.assertTrue(headers, 3) def testSingle(self): """ Test single call to CERN SSO url. """ # test RequestHandler url = "https://cms-gwmsmon.cern.ch/prodview/json/site_summary" params = {} headers = {"Cache-Control": "no-cache"} tfile = tempfile.NamedTemporaryFile() cern_sso_cookie(url, tfile.name, self.cert, self.ckey) cookie = {url: tfile.name} header, _ = self.mgr.request(url, params, headers, cookie=cookie) self.assertTrue(header.status, 200) def testContinue(self): """ Test HTTP exit code 100 - Continue """ header = "HTTP/1.1 100 Continue\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertIsNone(getattr(resp, "status", None)) self.assertEqual(resp.reason, "") self.assertFalse(resp.fromcache) self.assertIn("CMS-Server-Time", resp.header) self.assertIn("Date", resp.header) self.assertEqual(resp.header['Content-Type'], 'text/html') self.assertEqual(resp.header['Server'], 'Apache') self.assertEqual(resp.header['Transfer-Encoding'], 'chunked') return def testOK(self): """ Test HTTP exit code 200 - OK """ header = "HTTP/1.1 200 OK\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) return def testForbidden(self): """ Test HTTP exit code 403 - Forbidden """ header = "HTTP/1.1 403 Forbidden\r\n" + self.dbsheader resp = ResponseHeader(header) self.assertEqual(resp.status, 403) self.assertEqual(resp.reason, "Forbidden") self.assertFalse(resp.fromcache) return def testOKCRIC(self): """ Test HTTP exit code 200 - OK for a CRIC response header """ header = "HTTP/1.1 200 OK\r\n" + self.cricheader resp = ResponseHeader(header) self.assertEqual(resp.status, 200) self.assertEqual(resp.reason, "OK") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("sessionid", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testUnavailableCRICHTTP(self): """ Test HTTP exit code 503 - Service Unavailable for a CRIC response header when it also contains a HTTP string in the Set-Cookie header section """ header = "HTTP/1.1 503 Service Unavailable\r\n" + self.HTTPheader resp = ResponseHeader(header) self.assertEqual(resp.status, 503) self.assertEqual(resp.reason, "Service Unavailable") self.assertFalse(resp.fromcache) self.assertIn("Content-Length", resp.header) self.assertIn("Date", resp.header) self.assertIn("Server", resp.header) self.assertIn("GRIDHTTP_PASSCODE", resp.header['Set-Cookie']) self.assertEqual(resp.header['Content-Type'], 'application/json') self.assertEqual(resp.header['Vary'], 'Cookie') self.assertEqual(resp.header['X-Frame-Options'], 'SAMEORIGIN') return def testHeadRequest(self): """ Test a HEAD request. """ params = {} headers = {} url = 'https://cmsweb.cern.ch/reqmgr2/data/info' res = self.mgr.getheader(url, params=params, headers=headers, ckey=self.ckey, cert=self.cert) self.assertEqual(res.getReason(), "OK") self.assertTrue(len(res.getHeader()) > 10) # Kubernetes cluster responds with a different Server header serverHeader = res.getHeaderKey("Server") self.assertTrue( serverHeader.startswith("nginx/") or serverHeader.startswith("CherryPy/") or serverHeader.startswith("openresty/"))
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() # set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth( \ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ # TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: # WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. # assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" if verb != 'GET' and data: if isinstance(encoder, (types.MethodType, types.FunctionType)): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t # encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: # encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert isinstance(encoded_data, str), \ "Data in makeRequest is %s and not encoded to a string" % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: conn = self._getURLOpener() response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error raise socket.error except (socket.error, AttributeError): self['logger'].warn("Http request failed, retrying once again..") # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut for con in conn.connections.values(): con.close() conn = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: msg = traceback.format_exc() # socket/httplib really screwed up - nuclear option conn.connections = {} raise socket.error('Error contacting: %s: %s' % (self.getDomainName(), msg)) if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if isinstance(decoder, (types.MethodType, types.FunctionType)): result = decoder(result) elif decoder != False: result = self.decode(result) # TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ import httplib2 key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception as ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http def addBasicAuth(self, username, password): """Add basic auth headers to request""" auth_string = "Basic %s" % base64.encodestring('%s:%s' % ( username, password)).strip() self.additionalHeaders["Authorization"] = auth_string def getKeyCert(self): """ _getKeyCert_ Get the user credentials if they exist, otherwise throw an exception. This code was modified from DBSAPI/dbsHttpService.py """ cert = None key = None # Zeroth case is if the class has over ridden the key/cert and has it # stored in self if 'cert' in self and 'key' in self and self['cert'] and self['key']: key = self['key'] cert = self['cert'] # Now we're trying to guess what the right cert/key combo is... # First preference to HOST Certificate, This is how it set in Tier0 elif 'X509_HOST_CERT' in os.environ: cert = os.environ['X509_HOST_CERT'] key = os.environ['X509_HOST_KEY'] # Second preference to User Proxy, very common elif 'X509_USER_PROXY' in os.environ and os.path.exists(os.environ['X509_USER_PROXY']): cert = os.environ['X509_USER_PROXY'] key = cert # Third preference to User Cert/Proxy combinition elif 'X509_USER_CERT' in os.environ: cert = os.environ['X509_USER_CERT'] key = os.environ['X509_USER_KEY'] # TODO: only in linux, unix case, add other os case # look for proxy at default location /tmp/x509up_u$uid elif os.path.exists('/tmp/x509up_u' + str(os.getuid())): cert = '/tmp/x509up_u' + str(os.getuid()) key = cert # if interactive we can use an encrypted certificate elif sys.stdin.isatty(): if os.path.exists(os.environ['HOME'] + '/.globus/usercert.pem'): cert = os.environ['HOME'] + '/.globus/usercert.pem' if os.path.exists(os.environ['HOME'] + '/.globus/userkey.pem'): key = os.environ['HOME'] + '/.globus/userkey.pem' else: key = cert # Set but not found if key and cert: if not os.path.exists(cert) or not os.path.exists(key): raise WMException('Request requires a host certificate and key', "WMCORE-11") # All looks OK, still doesn't guarantee proxy's validity etc. return key, cert def getCAPath(self): """ _getCAPath_ Return the path of the CA certificates. The check is loose in the pycurl_manager: is capath == None then the server identity is not verified. To enable this check you need to set either the X509_CERT_DIR variable or the cacert key of the request. """ cacert = None if 'capath' in self: cacert = self['capath'] elif "X509_CERT_DIR" in os.environ: cacert = os.environ["X509_CERT_DIR"] return cacert def uploadFile(self, fileName, url, fieldName='file1', params=[], verb='POST'): """ Upload a file with curl streaming it directly from disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl c = pycurl.Curl() if verb == 'POST': c.setopt(c.POST, 1) elif verb == 'PUT': c.setopt(pycurl.CUSTOMREQUEST, 'PUT') else: raise HTTPException("Verb %s not sopported for upload." % verb) c.setopt(c.URL, url) fullParams = [(fieldName, (c.FORM_FILE, fileName))] fullParams.extend(params) c.setopt(c.HTTPPOST, fullParams) bbuf = StringIO.StringIO() hbuf = StringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION, bbuf.write) c.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: c.setopt(pycurl.CAPATH, capath) c.setopt(pycurl.SSL_VERIFYPEER, True) else: c.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: c.setopt(pycurl.SSLKEY, ckey) if cert: c.setopt(pycurl.SSLCERT, cert) c.perform() hres = hbuf.getvalue() bres = bbuf.getvalue() rh = ResponseHeader(hres) c.close() if rh.status < 200 or rh.status >= 300: exc = HTTPException(bres) setattr(exc, 'req_data', fullParams) setattr(exc, 'url', url) setattr(exc, 'result', bres) setattr(exc, 'status', rh.status) setattr(exc, 'reason', rh.reason) setattr(exc, 'headers', rh.header) raise exc return bres def downloadFile(self, fileName, url): """ Download a file with curl streaming it directly to disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl from WMCore.Services.pycurl_manager import ResponseHeader hbuf = StringIO.StringIO() with open(fileName, "wb") as fp: curl = pycurl.Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEDATA, fp) curl.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: curl.setopt(pycurl.CAPATH, capath) curl.setopt(pycurl.SSL_VERIFYPEER, True) else: curl.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: curl.setopt(pycurl.SSLKEY, ckey) if cert: curl.setopt(pycurl.SSLCERT, cert) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.perform() curl.close() header = ResponseHeader(hbuf.getvalue()) if header.status < 200 or header.status >= 300: raise RuntimeError('Reading %s failed with code %s' % (url, header.status)) return fileName, header
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url='http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) self.capath = idict.get('capath', None) if self.pycurl: self.reqmgr = RequestHandler() # set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("cert", None) self.setdefault("key", None) self.setdefault('capath', None) self.setdefault("timeout", 300) self.setdefault("logger", logging) check_server_url(self['host']) def get(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode=True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ # TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = { "Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type'] } encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] # And now overwrite any headers that have been passed into the call: # WARNING: doesn't work with deplate so only accept gzip incoming_headers["accept-encoding"] = "gzip,identity" headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. # assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" if verb != 'GET' and data: if isinstance(encoder, (types.MethodType, types.FunctionType)): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t # encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: # encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) # PY3 needed for compatibility because str under futurize is not a string. Can be just str in Py3 only # PY3 Don't let futurize change this assert isinstance(encoded_data, (str, basestring)), \ "Data in makeRequest is %s and not encoded to a string" % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: conn = self._getURLOpener() response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) if response.status == 408: # timeout can indicate a socket error raise socket.error except ServerNotFoundError as ex: # DNS cannot resolve this domain name, let's call it 'Service Unavailable' e = HTTPException() setattr(e, 'url', uri) setattr(e, 'status', 503) setattr(e, 'reason', 'Service Unavailable') setattr(e, 'result', str(ex)) raise e except (socket.error, AttributeError): self['logger'].warn("Http request failed, retrying once again..") # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut for con in conn.connections.values(): con.close() conn = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = conn.request(uri, method=verb, body=encoded_data, headers=headers) except AttributeError: msg = traceback.format_exc() # socket/httplib really screwed up - nuclear option conn.connections = {} raise socket.error('Error contacting: %s: %s' % (self.getDomainName(), msg)) if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if isinstance(decoder, (types.MethodType, types.FunctionType)): result = decoder(result) elif decoder != False: result = self.decode(result) # TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join( top, '%s-%s' % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ import httplib2 key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception as ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation=True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http def addBasicAuth(self, username, password): """Add basic auth headers to request""" auth_string = "Basic %s" % base64.encodestring( '%s:%s' % (username, password)).strip() self.additionalHeaders["Authorization"] = auth_string def getKeyCert(self): """ _getKeyCert_ Get the user credentials if they exist, otherwise throw an exception. This code was modified from DBSAPI/dbsHttpService.py """ # Zeroth case is if the class has over ridden the key/cert and has it # stored in self if self['cert'] and self['key']: key = self['key'] cert = self['cert'] else: key, cert = getKeyCertFromEnv() # Set but not found if key is None or cert is None: raise WMException('Request requires a host certificate and key', "WMCORE-11") # All looks OK, still doesn't guarantee proxy's validity etc. return key, cert def getCAPath(self): """ _getCAPath_ Return the path of the CA certificates. The check is loose in the pycurl_manager: is capath == None then the server identity is not verified. To enable this check you need to set either the X509_CERT_DIR variable or the cacert key of the request. """ capath = self['capath'] if not capath: capath = getCAPathFromEnv() return capath def uploadFile(self, fileName, url, fieldName='file1', params=[], verb='POST'): """ Upload a file with curl streaming it directly from disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl c = pycurl.Curl() if verb == 'POST': c.setopt(c.POST, 1) elif verb == 'PUT': c.setopt(pycurl.CUSTOMREQUEST, 'PUT') else: raise HTTPException("Verb %s not sopported for upload." % verb) c.setopt(c.URL, url) fullParams = [(fieldName, (c.FORM_FILE, fileName))] fullParams.extend(params) c.setopt(c.HTTPPOST, fullParams) bbuf = StringIO.StringIO() hbuf = StringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION, bbuf.write) c.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: c.setopt(pycurl.CAPATH, capath) c.setopt(pycurl.SSL_VERIFYPEER, True) else: c.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: c.setopt(pycurl.SSLKEY, ckey) if cert: c.setopt(pycurl.SSLCERT, cert) c.perform() hres = hbuf.getvalue() bres = bbuf.getvalue() rh = ResponseHeader(hres) c.close() if rh.status < 200 or rh.status >= 300: exc = HTTPException(bres) setattr(exc, 'req_data', fullParams) setattr(exc, 'url', url) setattr(exc, 'result', bres) setattr(exc, 'status', rh.status) setattr(exc, 'reason', rh.reason) setattr(exc, 'headers', rh.header) raise exc return bres def downloadFile(self, fileName, url): """ Download a file with curl streaming it directly to disk """ ckey, cert = self.getKeyCert() capath = self.getCAPath() import pycurl hbuf = StringIO.StringIO() with open(fileName, "wb") as fp: curl = pycurl.Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.WRITEDATA, fp) curl.setopt(pycurl.HEADERFUNCTION, hbuf.write) if capath: curl.setopt(pycurl.CAPATH, capath) curl.setopt(pycurl.SSL_VERIFYPEER, True) else: curl.setopt(pycurl.SSL_VERIFYPEER, False) if ckey: curl.setopt(pycurl.SSLKEY, ckey) if cert: curl.setopt(pycurl.SSLCERT, cert) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.perform() curl.close() header = ResponseHeader(hbuf.getvalue()) if header.status < 200 or header.status >= 300: raise RuntimeError('Reading %s failed with code %s' % (url, header.status)) return fileName, header
class Requests(dict): """ Generic class for sending different types of HTTP Request to a given URL """ def __init__(self, url = 'http://localhost', idict=None): """ url should really be host - TODO fix that when have sufficient code coverage and change _getURLOpener if needed """ if not idict: idict = {} dict.__init__(self, idict) self.pycurl = idict.get('pycurl', None) if self.pycurl: self.reqmgr = RequestHandler() #set up defaults self.setdefault("accept_type", 'text/html') self.setdefault("content_type", 'application/x-www-form-urlencoded') self.additionalHeaders = {} # check for basic auth early, as if found this changes the url urlComponent = sanitizeURL(url) if urlComponent['username'] is not None: self.addBasicAuth(\ urlComponent['username'], urlComponent['password']) url = urlComponent['url'] # remove user, password from url self.setdefault("host", url) # then update with the incoming dict self.update(idict) self['endpoint_components'] = urlparse.urlparse(self['host']) # If cachepath = None disable caching if 'cachepath' in idict and idict['cachepath'] is None: self["req_cache_path"] = None else: cache_dir = (self.cachePath(idict.get('cachepath'), \ idict.get('service_name'))) self["cachepath"] = cache_dir self["req_cache_path"] = os.path.join(cache_dir, '.cache') self.setdefault("timeout", 30) self.setdefault("logger", logging) check_server_url(self['host']) # and then get the URL opener self.setdefault("conn", self._getURLOpener()) def get(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ GET some data """ return self.makeRequest(uri, data, 'GET', incoming_headers, encode, decode, contentType) def post(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ POST some data """ return self.makeRequest(uri, data, 'POST', incoming_headers, encode, decode, contentType) def put(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ PUT some data """ return self.makeRequest(uri, data, 'PUT', incoming_headers, encode, decode, contentType) def delete(self, uri=None, data={}, incoming_headers={}, encode = True, decode=True, contentType=None): """ DELETE some data """ return self.makeRequest(uri, data, 'DELETE', incoming_headers, encode, decode, contentType) def makeRequest(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Wrapper around request helper functions. """ if self.pycurl: result = self.makeRequest_pycurl(uri, data, verb, incoming_headers, encoder, decoder, contentType) else: result = self.makeRequest_httplib(uri, data, verb, incoming_headers, encoder, decoder, contentType) return result def makeRequest_pycurl(self, uri=None, params={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make HTTP(s) request via pycurl library. Stay complaint with makeRequest_httplib method. """ ckey, cert = self.getKeyCert() capath = self.getCAPath() if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) url = self['host'] + uri response, data = self.reqmgr.request(url, params, headers, \ verb=verb, ckey=ckey, cert=cert, capath=capath, decode=decoder) return data, response.status, response.reason, response.fromcache def makeRequest_httplib(self, uri=None, data={}, verb='GET', incoming_headers={}, encoder=True, decoder=True, contentType=None): """ Make a request to the remote database. for a give URI. The type of request will determine the action take by the server (be careful with DELETE!). Data should be a dictionary of {dataname: datavalue}. Returns a tuple of the data from the server, decoded using the appropriate method the response status and the response reason, to be used in error handling. You can override the method to encode/decode your data by passing in an encoding/decoding function to this method. Your encoded data must end up as a string. """ #TODO: User agent should be: # $client/$client_version (CMS) # $http_lib/$http_lib_version $os/$os_version ($arch) if not contentType: contentType = self['content_type'] headers = {"Content-type": contentType, "User-agent": "WMCore.Services.Requests/v001", "Accept": self['accept_type']} encoded_data = '' for key in self.additionalHeaders.keys(): headers[key] = self.additionalHeaders[key] #And now overwrite any headers that have been passed into the call: headers.update(incoming_headers) # httpib2 requires absolute url uri = self['host'] + uri # If you're posting an attachment, the data might not be a dict # please test against ConfigCache_t if you're unsure. #assert type(data) == type({}), \ # "makeRequest input data must be a dict (key/value pairs)" # There must be a better way to do this... def f(): """Dummy function""" pass if verb != 'GET' and data: if type(encoder) == type(self.get) or type(encoder) == type(f): encoded_data = encoder(data) elif encoder == False: # Don't encode the data more than we have to # we don't want to URL encode the data blindly, # that breaks POSTing attachments... ConfigCache_t #encoded_data = urllib.urlencode(data) # -- Andrew Melo 25/7/09 encoded_data = data else: # Either the encoder is set to True or it's junk, so use # self.encode encoded_data = self.encode(data) headers["Content-length"] = len(encoded_data) elif verb == 'GET' and data: #encode the data as a get string uri = "%s?%s" % (uri, urllib.urlencode(data, doseq=True)) headers["Content-length"] = str(len(encoded_data)) assert type(encoded_data) == type('string'), \ "Data in makeRequest is %s and not encoded to a string" \ % type(encoded_data) # httplib2 will allow sockets to close on remote end without retrying # try to send request - if this fails try again - should then succeed try: response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) if response.status == 408: # timeout can indicate a socket error response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) except (socket.error, AttributeError): # AttributeError implies initial connection error - need to close # & retry. httplib2 doesn't clear httplib state before next request # if this is threaded this may spoil things # only have one endpoint so don't need to determine which to shut [conn.close() for conn in self['conn'].connections.values()] self['conn'] = self._getURLOpener() # ... try again... if this fails propagate error to client try: response, result = self['conn'].request(uri, method = verb, body = encoded_data, headers = headers) except AttributeError: # socket/httplib really screwed up - nuclear option self['conn'].connections = {} raise socket.error, 'Error contacting: %s' \ % self.getDomainName() if response.status >= 400: e = HTTPException() setattr(e, 'req_data', encoded_data) setattr(e, 'req_headers', headers) setattr(e, 'url', uri) setattr(e, 'result', result) setattr(e, 'status', response.status) setattr(e, 'reason', response.reason) setattr(e, 'headers', response) raise e if type(decoder) == type(self.makeRequest) or type(decoder) == type(f): result = decoder(result) elif decoder != False: result = self.decode(result) #TODO: maybe just return result and response... return result, response.status, response.reason, response.fromcache def encode(self, data): """ encode data into some appropriate format, for now make it a string... """ return urllib.urlencode(data, doseq=1) def decode(self, data): """ decode data to some appropriate format, for now make it a string... """ return data.__str__() def cachePath(self, given_path, service_name): """Return cache location""" if not service_name: service_name = 'REQUESTS' top = self.cacheTopPath(given_path, service_name) # deal with multiple Services that have the same service running and # with multiple users for a given Service if self.getUserName() is None: cachepath = os.path.join(top, self['endpoint_components'].netloc) else: cachepath = os.path.join(top, '%s-%s' \ % (self.getUserName(), self.getDomainName())) try: # only we should be able to write to this dir os.makedirs(cachepath, stat.S_IRWXU) except OSError: if not os.path.isdir(cachepath): raise Permissions.owner_readwriteexec(cachepath) return cachepath def cacheTopPath(self, given_path, service_name): """Where to cache results? Logic: o If passed in take that o Is the environment variable "SERVICE_NAME"_CACHE_DIR defined? o Is WMCORE_CACHE_DIR set o Generate a temporary directory """ if given_path: return given_path user = str(os.getuid()) # append user id so users don't clobber each other lastbit = os.path.join('.wmcore_cache_%s' % user, service_name.lower()) for var in ('%s_CACHE_DIR' % service_name.upper(), 'WMCORE_CACHE_DIR'): if os.environ.get(var): firstbit = os.environ[var] break else: idir = tempfile.mkdtemp(prefix='.wmcore_cache_') # object to store temporary directory - cleaned up on destruction self['deleteCacheOnExit'] = TempDirectory(idir) return idir return os.path.join(firstbit, lastbit) def getDomainName(self): """Parse netloc info to get hostname""" return self['endpoint_components'].hostname def getUserName(self): """Parse netloc to get user""" return self['endpoint_components'].username def _getURLOpener(self): """ method getting a secure (HTTPS) connection """ key, cert = None, None if self['endpoint_components'].scheme == 'https': # only add certs to https requests # if we have a key/cert add to request, # if not proceed as not all https connections require them try: key, cert = self.getKeyCert() except Exception, ex: msg = 'No certificate or key found, authentication may fail' self['logger'].info(msg) self['logger'].debug(str(ex)) try: # disable validation as we don't have a single PEM with all ca's http = httplib2.Http(self['req_cache_path'], self['timeout'], disable_ssl_certificate_validation = True) except TypeError: # old httplib2 versions disable validation by default http = httplib2.Http(self['req_cache_path'], self['timeout']) # Domain must be just a hostname and port. self[host] is a URL currently if key or cert: http.add_certificate(key=key, cert=cert, domain='') return http
def setUp(self): "initialization" self.mgr = RequestHandler() self.ckey = os.path.join(os.environ['HOME'], '.globus/userkey.pem') self.cert = os.path.join(os.environ['HOME'], '.globus/usercert.pem')
class MSRuleCleaner(MSCore): """ MSRuleCleaner.py class provides the logic used to clean the Rucio block level data placement rules created by WMAgent. """ def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig['enableRealMode'] else "DryRunMode" self.curlMgr = RequestHandler() self.targetStatusRegex = re.compile(r'.*archived') self.logDB = LogDB(self.msConfig["logDBUrl"], self.msConfig["logDBReporter"], logger=self.logger) self.wmstatsSvc = WMStatsServer(self.msConfig['wmstatsUrl'], logger=self.logger) # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'container', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.setParentDatasets), Functor(self.getRucioRules, 'block', self.msConfig['rucioMStrAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules)]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[Functor(self.setPlineMarker, pName), Functor(self.findTargetStatus), Functor(self.setClean), Functor(self.setArchivalDelayExpired), Functor(self.setLogDBClean), Functor(self.archive)]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': {'normalArchived': 0, 'forceArchived': 0}} self.globalLocks = set() def getGlobalLocks(self): """ Fetches the list of 'globalLocks' from wmstats server and the list of 'parentLocks' from request manager. Stores/updates the unified set in the 'globalLocks' instance variable. Returns the resultant unified set. :return: A union set of the 'globalLocks' and the 'parentLocks' lists """ self.logger.info("Fetching globalLocks list from wmstats server.") try: globalLocks = set(self.wmstatsSvc.getGlobalLocks()) except Exception as ex: msg = "Failed to refresh global locks list for the current polling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.logger.info("Fetching parentLocks list from reqmgr2 server.") try: parentLocks = set(self.reqmgr2.getParentLocks()) except Exception as ex: msg = "Failed to refresh parent locks list for the current poling cycle. Error: %s " msg += "Skipping this polling cycle." self.logger.error(msg, str(ex)) raise ex self.globalLocks = globalLocks | parentLocks def resetCounters(self): """ A simple function for zeroing the cleaned and archived counters. """ for pline in self.cleanuplines: self.wfCounters['cleaned'][pline.name] = 0 self.wfCounters['archived']['normalArchived'] = 0 self.wfCounters['archived']['forceArchived'] = 0 def execute(self, reqStatus): """ Executes the whole ruleCleaner logic :return: summary """ # start threads in MSManager which should call this method summary = dict(RULECLEANER_REPORT) self.currThread = current_thread() self.currThreadIdent = self.currThread.name self.updateReportDict(summary, "thread_id", self.currThreadIdent) self.resetCounters() self.logger.info("MSRuleCleaner is running in mode: %s.", self.mode) # Build the list of workflows to work on: try: requestRecords = {} for status in reqStatus: requestRecords.update(self.getRequestRecords(status)) except Exception as err: # general error msg = "Unknown exception while fetching requests from ReqMgr2. Error: %s", str(err) self.logger.exception(msg) self.updateReportDict(summary, "error", msg) # Call _execute() and feed the relevant pipeline with the objects popped from requestRecords try: self.getGlobalLocks() totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests = self._execute(requestRecords) msg = "\nNumber of processed workflows: %s." msg += "\nNumber of properly cleaned workflows: %s." msg += "\nNumber of normally archived workflows: %s." msg += "\nNumber of force archived workflows: %s." self.logger.info(msg, totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests) self.updateReportDict(summary, "total_num_requests", totalNumRequests) self.updateReportDict(summary, "clean_num_requests", cleanNumRequests) self.updateReportDict(summary, "normal_archived_num_requests", normalArchivedNumRequests) self.updateReportDict(summary, "force_archived_num_requests", forceArchivedNumRequests) except Exception as ex: msg = "Unknown exception while running MSRuleCleaner thread Error: %s" self.logger.exception(msg, str(ex)) self.updateReportDict(summary, "error", msg) return summary def _execute(self, reqRecords): """ Executes the MSRuleCleaner pipelines based on the workflow status :param reqList: A list of RequestRecords to work on :return: a tuple with: number of properly cleaned requests number of processed workflows number of archived workflows """ # NOTE: The Input Cleanup, the Block Level Cleanup and the Archival # Pipelines are executed sequentially in the above order. # This way we assure ourselves that we archive only workflows # that have accomplished the needed cleanup cleanNumRequests = 0 totalNumRequests = 0 # Call the workflow dispatcher: for req in viewvalues(reqRecords): wflow = MSRuleCleanerWflow(req) self._dispatchWflow(wflow) msg = "\n----------------------------------------------------------" msg += "\nMSRuleCleanerWflow: %s" msg += "\n----------------------------------------------------------" self.logger.debug(msg, pformat(wflow)) totalNumRequests += 1 if self._checkClean(wflow): cleanNumRequests += 1 # Report the counters: for pline in self.cleanuplines: msg = "Workflows cleaned by pipeline: %s: %d" self.logger.info(msg, pline.name, self.wfCounters['cleaned'][pline.name]) normalArchivedNumRequests = self.wfCounters['archived']['normalArchived'] forceArchivedNumRequests = self.wfCounters['archived']['forceArchived'] self.logger.info("Workflows normally archived: %d", self.wfCounters['archived']['normalArchived']) self.logger.info("Workflows force archived: %d", self.wfCounters['archived']['forceArchived']) return totalNumRequests, cleanNumRequests, normalArchivedNumRequests, forceArchivedNumRequests def _dispatchWflow(self, wflow): """ A function intended to dispatch a workflow (e.g based on its status) through one or more functional pipelines in case there is some more complicated logic involved in the order we execute them but not just a sequentially """ self.logger.debug("Dispatching workflow: %s", wflow['RequestName']) # NOTE: The following dispatch logic is a subject to be changed at any time # Resolve: # NOTE: First resolve any preliminary flags that will be needed further # in the logic of the _dispatcher() itself if wflow['RequestStatus'] == 'announced': self.getMSOutputTransferInfo(wflow) # Clean: # Do not clean any Resubmission, but still let them be archived if wflow['RequestType'] == 'Resubmission': wflow['ForceArchive'] = True msg = "Skipping cleanup step for workflow: %s - RequestType is %s." msg += " Will try to archive it directly." self.logger.info(msg, wflow['RequestName'], wflow['RequestType']) elif wflow['RequestStatus'] in ['rejected', 'aborted-completed']: # NOTE: We do not check the ParentageResolved flag for these # workflows, but we do need to clean output data placement # rules from the agents for them for pline in self.agentlines: try: pline.run(wflow) except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 elif wflow['RequestStatus'] == 'announced' and not wflow['ParentageResolved']: # NOTE: We skip workflows which are not having 'ParentageResolved' # flag, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'ParentageResolved' flag set to false." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferDone']: # NOTE: We skip workflows which have not yet finalised their TransferStatus # in MSOutput, but we still need some proper logging for them. msg = "Skipping workflow: %s - 'TransferStatus' is 'pending' or 'TransferInfo' is missing in MSOutput." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced' and not wflow['TransferTape']: # NOTE: We skip workflows which have not yet finalised their tape transfers. # (i.e. even if a single output which is supposed to be covered # by a tape rule is in any of the following transient states: # {REPLICATING, STUCK, SUSPENDED, WAITING_APPROVAL}.) # We still need some proper logging for them. msg = "Skipping workflow: %s - tape transfers are not yet completed." msg += " Will retry again in the next cycle." self.logger.info(msg, wflow['RequestName']) elif wflow['RequestStatus'] == 'announced': for pline in self.cleanuplines: try: pline.run(wflow) except MSRuleCleanerResolveParentError as ex: msg = "%s: Parentage Resolve Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, pline.name, str(ex)) continue except Exception as ex: msg = "%s: General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, pline.name, wflow['RequestName'], str(ex)) continue if wflow['CleanupStatus'][pline.name]: self.wfCounters['cleaned'][pline.name] += 1 else: # We shouldn't be here: msg = "Skipping workflow: %s - " msg += "Does not fall under any of the defined categories." self.logger.error(msg, wflow['RequestName']) # Archive: try: self.plineArchive.run(wflow) if wflow['ForceArchive']: self.wfCounters['archived']['forceArchived'] += 1 else: self.wfCounters['archived']['normalArchived'] += 1 except MSRuleCleanerArchivalSkip as ex: msg = "%s: Proper conditions not met: %s. " msg += "Skipping archival in the current cycle." self.logger.info(msg, wflow['PlineMarkers'][-1], str(ex)) except MSRuleCleanerArchivalError as ex: msg = "%s: Archival Error: %s. " msg += "Will retry again in the next cycle." self.logger.error(msg, wflow['PlineMarkers'][-1], str(ex)) except Exception as ex: msg = "%s General error from pipeline. Workflow: %s. Error: \n%s. " msg += "\nWill retry again in the next cycle." self.logger.exception(msg, wflow['PlineMarkers'][-1], wflow['RequestName'], str(ex)) def setPlineMarker(self, wflow, pName): """ A function intended to mark which is the pipeline currently working on the workflow. It is supposed to be called always as a first function in the pipeline. :param wflow: A MSRuleCleaner workflow representation :param pName: The name of the functional pipeline :return: The workflow object """ # NOTE: The current functional pipeline MUST always be appended at the # end of the 'PlineMarkers' list # First get rid of the default: if not wflow['PlineMarkers']: wflow['PlineMarkers'] = [] # Then push our current value into the markers list: wflow['PlineMarkers'].append(pName) # Populate the list of flags to be used later: if pName not in wflow['RulesToClean']: if pName in self.cleanupPipeNames: wflow['RulesToClean'][pName] = [] if pName not in wflow['CleanupStatus']: if pName in self.cleanupPipeNames: wflow['CleanupStatus'][pName] = False return wflow def _checkClean(self, wflow): """ An auxiliary function used to only check the temporary cleanup status. It basically takes the pipelines registered in 'PlineMarkers' that have already worked on the workflow as a mask and applies this mask over the set of flags in the 'CleanupStatus' field and then reduces the result to a single bool value """ # NOTE: This is one of the few functions taking a workflow as an argument # but returning a bool, since it is an auxiliary function and is not # supposed to be called as a standalone function in a pipeline. # NOTE: `all([]) == True`, ergo all the 'rejected' && 'aborted-completed' workflows # are also counted as properly cleaned and can trigger archival later # Build a list of bool flags based on the mask of PlineMarkers cleanFlagsList = [wflow['CleanupStatus'][key] for key in wflow['PlineMarkers'] if key in wflow['CleanupStatus']] # If no one have worked on the workflow set the clean status to false if not wflow['PlineMarkers']: cleanStatus = False # If we have a mask longer than the list of flags avoid false positives # because of the behavior explained above - `all([]) == True` elif not cleanFlagsList: cleanStatus = False # Figure out the final value else: cleanStatus = all(cleanFlagsList) return cleanStatus def setClean(self, wflow): """ A function to set the 'IsClean' flag based on the status from all the pipelines which have worked on the workflow (and have put their markers in the 'PlineMarkers' list) :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsClean'] = self._checkClean(wflow) return wflow def _checkLogDBClean(self, wflow): """ An auxiliary function used to only check the LogDB cleanup status. It makes a query to LogDB in order to verify there are no any records for the current workflow :param wflow: A MSRuleCleaner workflow representation :return: True if no records were found in LogDB about wflow """ cleanStatus = False logDBRecords = self.logDB.get(wflow['RequestName']) self.logger.debug("logDBRecords: %s", pformat(logDBRecords)) if not logDBRecords: cleanStatus = True return cleanStatus def setLogDBClean(self, wflow): """ A function to set the 'IsLogDBClean' flag based on the presence of any records in LogDB for the current workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ wflow['IsLogDBClean'] = self._checkLogDBClean(wflow) if not wflow['IsLogDBClean'] and wflow['IsArchivalDelayExpired']: wflow['IsLogDBClean'] = self._cleanLogDB(wflow) return wflow def _cleanLogDB(self, wflow): """ A function to be used for cleaning all the records related to a workflow in logDB. :param wflow: A MSRuleCleaner workflow representation :return: True if NO errors were encountered while deleting records from LogDB """ cleanStatus = False try: if self.msConfig['enableRealMode']: self.logger.info("Deleting %s records from LogDB WMStats...", wflow['RequestName']) res = self.logDB.delete(wflow['RequestName'], agent=False) if res == 'delete-error': msg = "Failed to delete logDB docs for wflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) cleanStatus = True else: self.logger.info("DRY-RUN: NOT Deleting %s records from LogDB WMStats...", wflow['RequestName']) except Exception as ex: msg = "General Exception while cleaning LogDB records for wflow: %s : %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) return cleanStatus def findTargetStatus(self, wflow): """ Find the proper targeted archival status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Check the available status transitions before we decide the final status targetStatusList = RequestStatus.REQUEST_STATE_TRANSITION.get(wflow['RequestStatus'], []) for status in targetStatusList: if self.targetStatusRegex.match(status): wflow['TargetStatus'] = status self.logger.debug("TargetStatus: %s", wflow['TargetStatus']) return wflow def _checkArchDelayExpired(self, wflow): """ A function to check Archival Expiration Delay based on the information returned by WMStatsServer regarding the time of the last request status transition :param wflow: MSRuleCleaner workflow representation :return: True if the archival delay have been expired """ archDelayExpired = False currentTime = int(time.time()) threshold = self.msConfig['archiveDelayHours'] * 3600 try: lastTransitionTime = wflow['RequestTransition'][-1]['UpdateTime'] if lastTransitionTime and (currentTime - lastTransitionTime) > threshold: archDelayExpired = True except KeyError: self.logger.debug("Could not find status transition history for %s", wflow['RequestName']) return archDelayExpired def setArchivalDelayExpired(self, wflow): """ A function to set the 'IsArchivalDelayExpired' flag """ wflow['IsArchivalDelayExpired'] = self._checkArchDelayExpired(wflow) return wflow def archive(self, wflow): """ Move the workflow to the proper archived status after checking the full cleanup status :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # Make all the needed checks before trying to archive if not (wflow['IsClean'] or wflow['ForceArchive']): msg = "Not properly cleaned workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['TargetStatus']: msg = "Could not determine which archival status to target for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalError(msg) if not wflow['IsLogDBClean']: msg = "LogDB records have not been cleaned for workflow: %s" % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not wflow['IsArchivalDelayExpired']: msg = "Archival delay period has not yet expired for workflow: %s." % wflow['RequestName'] raise MSRuleCleanerArchivalSkip(msg) if not self.msConfig['enableRealMode']: msg = "Real Run Mode not enabled." raise MSRuleCleanerArchivalSkip(msg) # Proceed with the actual archival: try: self.reqmgr2.updateRequestStatus(wflow['RequestName'], wflow['TargetStatus']) msg = "Successful status transition to: %s for workflow: %s" self.logger.info(msg, wflow['TargetStatus'], wflow['RequestName']) except Exception as ex: msg = "General Exception while trying status transition to: %s " % wflow['TargetStatus'] msg += "for workflow: %s : %s" % (wflow['RequestName'], str(ex)) raise MSRuleCleanerArchivalError(msg) return wflow def getMSOutputTransferInfo(self, wflow): """ Fetches the transfer information from the MSOutput REST interface for the given workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ headers = {'Accept': 'application/json'} params = {} url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'], wflow['RequestName']) try: res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert()) data = json.loads(res)['result'][0] transferInfo = data['transferDoc'] except Exception as ex: msg = "General exception while fetching TransferInfo from MSOutput for %s. " msg += "Error: %s" self.logger.exception(msg, wflow['RequestName'], str(ex)) # Set Transfer status - information fetched from MSOutput only if transferInfo is not None and transferInfo['TransferStatus'] == 'done': wflow['TransferDone'] = True # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput) if transferInfo is not None and transferInfo['OutputMap']: tapeRulesStatusList = [] # For setting 'TransferTape' = True we require either no tape rules for the # workflow have been created or all existing tape rules to be in status 'OK', # so every empty TapeRuleID we consider as completed. for mapRecord in transferInfo['OutputMap']: if not mapRecord['TapeRuleID']: continue rucioRule = self.rucio.getRule(mapRecord['TapeRuleID']) if not rucioRule: tapeRulesStatusList.append(False) msg = "Tape rule: %s not found for workflow: %s " msg += "Possible server side error." self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName']) continue if rucioRule['state'] == 'OK': tapeRulesStatusList.append(True) msg = "Tape rule: %s in final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) else: tapeRulesStatusList.append(False) msg = "Tape rule: %s in non final state: %s for workflow: %s" self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName']) if all(tapeRulesStatusList): wflow['TransferTape'] = True return wflow def setParentDatasets(self, wflow): """ Used to resolve parent datasets for a workflow. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ if wflow['InputDataset'] and wflow['IncludeParents']: childDataset = wflow['InputDataset'] parentDataset = findParent([childDataset], self.msConfig['dbsUrl']) # NOTE: If findParent() returned None then the DBS service failed to # resolve the request (it is considered an ERROR outside WMCore) if parentDataset.get(childDataset, None) is None: msg = "Failed to resolve parent dataset for: %s in workflow: %s" % (childDataset, wflow['RequestName']) raise MSRuleCleanerResolveParentError(msg) elif parentDataset: wflow['ParentDataset'] = [parentDataset[childDataset]] msg = "Found parent %s for input dataset %s in workflow: %s " self.logger.info(msg, parentDataset, wflow['InputDataset'], wflow['RequestName']) else: msg = "Could not find parent for input dataset: %s in workflows: %s" self.logger.error(msg, wflow['InputDataset'], wflow['RequestName']) return wflow def getRucioRules(self, wflow, gran, rucioAcct): """ Queries Rucio and builds the relevant list of blocklevel rules for the given workflow :param wflow: A MSRuleCleaner workflow representation :param gran: Data granularity to search for Rucio rules. Possible values: 'block' or 'container' :return: The workflow object """ currPline = wflow['PlineMarkers'][-1] # Create the container list to the rucio account map and set the checkGlobalLocks flag. mapRuleType = {self.msConfig['rucioWmaAccount']: ["OutputDatasets"], self.msConfig['rucioMStrAccount']: ["InputDataset", "MCPileup", "DataPileup", "ParentDataset"]} if rucioAcct == self.msConfig['rucioMStrAccount']: checkGlobalLocks = True else: checkGlobalLocks = False # Find all the data placement rules created by the components: for dataType in mapRuleType[rucioAcct]: dataList = wflow[dataType] if isinstance(wflow[dataType], list) else [wflow[dataType]] for dataCont in dataList: if dataCont is None: continue self.logger.debug("getRucioRules: dataCont: %s", pformat(dataCont)) if checkGlobalLocks and dataCont in self.globalLocks: msg = "Found dataset: %s in GlobalLocks. NOT considering it for filling the " msg += "RulesToClean list for both container and block level Rules for workflow: %s!" self.logger.info(msg, dataCont, wflow['RequestName']) continue if gran == 'container': for rule in self.rucio.listDataRules(dataCont, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s container-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) elif gran == 'block': try: blocks = self.rucio.getBlocksInContainer(dataCont) for block in blocks: for rule in self.rucio.listDataRules(block, account=rucioAcct): wflow['RulesToClean'][currPline].append(rule['id']) msg = "Found %s block-level rule to be deleted for container %s" self.logger.info(msg, rule['id'], dataCont) except WMRucioDIDNotFoundException: msg = "Container: %s not found in Rucio for workflow: %s." self.logger.info(msg, dataCont, wflow['RequestName']) return wflow def cleanRucioRules(self, wflow): """ Cleans all the Rules present in the field 'RulesToClean' in the MSRuleCleaner workflow representation. And fills the relevant Cleanup Status. :param wflow: A MSRuleCleaner workflow representation :return: The workflow object """ # NOTE: The function should be called independently and sequentially from # The Input and the respective BlockLevel pipelines. # NOTE: The current functional pipeline is always the last one in the PlineMarkers list currPline = wflow['PlineMarkers'][-1] delResults = [] if self.msConfig['enableRealMode']: for rule in wflow['RulesToClean'][currPline]: self.logger.info("%s: Deleting ruleId: %s ", currPline, rule) delResult = self.rucio.deleteRule(rule) delResults.append(delResult) if not delResult: self.logger.warning("%s: Failed to delete ruleId: %s ", currPline, rule) else: for rule in wflow['RulesToClean'][currPline]: delResults.append(True) self.logger.info("%s: DRY-RUN: Is about to delete ruleId: %s ", currPline, rule) # Set the cleanup flag: wflow['CleanupStatus'][currPline] = all(delResults) return wflow def getRequestRecords(self, reqStatus): """ Queries ReqMgr2 for requests in a given status. :param reqStatus: The status for the requests to be fetched from ReqMgr2 :return requests: A dictionary with all the workflows in the given status """ self.logger.info("Fetching requests in status: %s", reqStatus) result = self.reqmgr2.getRequestByStatus([reqStatus], detail=True) if not result: requests = {} else: requests = result[0] self.logger.info(' retrieved %s requests in status: %s', len(requests), reqStatus) return requests
def __init__(self, msConfig, logger=None): """ Runs the basic setup and initialization for the MSRuleCleaner module :param msConfig: micro service configuration """ super(MSRuleCleaner, self).__init__(msConfig, logger=logger) self.msConfig.setdefault("verbose", True) self.msConfig.setdefault("interval", 60) self.msConfig.setdefault("services", ['ruleCleaner']) self.msConfig.setdefault("rucioWmaAccount", "wma_test") self.msConfig.setdefault("rucioMStrAccount", "wmcore_transferor") self.msConfig.setdefault('enableRealMode', False) self.mode = "RealMode" if self.msConfig[ 'enableRealMode'] else "DryRunMode" self.emailAlert = EmailAlert(self.msConfig) self.curlMgr = RequestHandler() # Building all the Pipelines: pName = 'plineMSTrCont' self.plineMSTrCont = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineMSTrBlock' self.plineMSTrBlock = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.cleanRucioRules) ]) pName = 'plineAgentCont' self.plineAgentCont = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'container', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineAgentBlock' self.plineAgentBlock = Pipeline( name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.getRucioRules, 'block', self.msConfig['rucioWmaAccount']), Functor(self.cleanRucioRules) ]) pName = 'plineArchive' self.plineArchive = Pipeline(name=pName, funcLine=[ Functor(self.setPlineMarker, pName), Functor(self.setClean), Functor(self.archive) ]) # Building the different set of plines we will need later: # NOTE: The following are all the functional pipelines which are supposed to include # a cleanup function and report cleanup status in the MSRuleCleanerWflow object self.cleanuplines = [ self.plineMSTrCont, self.plineMSTrBlock, self.plineAgentCont, self.plineAgentBlock ] # Building an auxiliary list of cleanup pipeline names only: self.cleanupPipeNames = [pline.name for pline in self.cleanuplines] # Building lists of pipelines related only to Agents or MStransferror self.agentlines = [self.plineAgentCont, self.plineAgentBlock] self.mstrlines = [self.plineMSTrCont, self.plineMSTrBlock] # Initialization of the 'cleaned' and 'archived' counters: self.wfCounters = {'cleaned': {}, 'archived': 0}
def command(self, jobs, jobs_lfn, jobs_pfn, jobs_report): """ For each job the worker has to complete: Delete files that have failed previously Create a temporary copyjob file Submit the copyjob to the appropriate FTS server Parse the output of the FTS transfer and return complete and failed files for recording """ # Output: {"userProxyPath":"/path/to/proxy","LFNs":["lfn1","lfn2","lfn3"],"PFNs":["pfn1","pfn2","pfn3"],"FTSJobid":'id-of-fts-job', "username": '******'} #Loop through all the jobs for the links we have failure_reasons = [] for link, copyjob in jobs.items(): submission_error = False status_error = False fts_job = {} # Validate copyjob file before doing anything self.logger.debug("Valid %s" % self.validate_copyjob(copyjob)) if not self.validate_copyjob(copyjob): continue rest_copyjob = { "params":{ "bring_online": None, "verify_checksum": False, "copy_pin_lifetime": -1, "max_time_in_queue": self.config.max_h_in_queue, "job_metadata":{"issuer": "ASO"}, "spacetoken": None, "source_spacetoken": None, "fail_nearline": False, "overwrite": True, "gridftp": None }, "files":[] } pairs = [] for SrcDest in copyjob: tempDict = {"sources": [], "metadata": None, "destinations": []} tempDict["sources"].append(SrcDest.split(" ")[0]) tempDict["destinations"].append(SrcDest.split(" ")[1]) rest_copyjob["files"].append(tempDict) self.logger.debug("Subbmitting this REST copyjob %s" % rest_copyjob) url = self.fts_server_for_transfer + '/jobs' self.logger.debug("Running FTS submission command") self.logger.debug("FTS server: %s" % self.fts_server_for_transfer) self.logger.debug("link: %s -> %s" % link) heade = {"Content-Type ":"application/json"} buf = StringIO.StringIO() try: connection = RequestHandler(config={'timeout': 300, 'connecttimeout' : 300}) except Exception as ex: msg = str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) try: response, datares = connection.request(url, rest_copyjob, heade, verb='POST', doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) self.logger.debug("Submission done") self.logger.debug('Submission header status: %s' % response.status) self.logger.debug('Submission header reason: %s' % response.reason) self.logger.debug('Submission result %s' % datares) except Exception as ex: msg = "Error submitting to FTS: %s " % url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) failure_reasons.append(msg) submission_error = True buf.close() if not submission_error: res = {} try: res = json.loads(datares) except Exception as ex: msg = "Couldn't load submission acknowledgment from FTS" msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) if 'job_id' in res: fileId_list = [] files_res = [] files_ = {} job_id = res['job_id'] file_url = self.fts_server_for_transfer + '/jobs/' + job_id +'/files' self.logger.debug("Submitting to %s" % file_url) file_buf = StringIO.StringIO() try: response, files_ = connection.request(file_url, {}, heade, doseq=True, ckey=self.user_proxy, \ cert=self.user_proxy, capath='/etc/grid-security/certificates', \ cainfo=self.user_proxy, verbose=True) files_res = json.loads(files_) except Exception as ex: msg = "Error contacting FTS to retrieve file: %s " % file_url msg += str(ex) msg += str(traceback.format_exc()) self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("List files in job %s" % files_) file_buf.close() for file_in_job in files_res: if 'file_id' in file_in_job: fileId_list.append(file_in_job['file_id']) else: msg = "Could not load submitted file %s from FTS" % file_url self.logger.debug(msg) submission_error = True failure_reasons.append(msg) self.logger.debug("File id list %s" % fileId_list) if submission_error: self.logger.debug("Submission failed") self.logger.info("Mark failed %s files" % len(jobs_lfn[link])) self.logger.debug("Mark failed %s files" % jobs_lfn[link]) failed_files = self.mark_failed(jobs_lfn[link], force_fail=False, submission_error=True, failure_reasons=failure_reasons) self.logger.info("Marked failed %s" % len(failed_files)) continue fts_job['userProxyPath'] = self.user_proxy fts_job['LFNs'] = jobs_lfn[link] fts_job['PFNs'] = jobs_pfn[link] fts_job['FTSJobid'] = job_id fts_job['files_id'] = fileId_list fts_job['username'] = self.user self.logger.debug("Creating json file %s in %s" % (fts_job, self.dropbox_dir)) ftsjob_file = open('%s/Monitor.%s.json' % (self.dropbox_dir, fts_job['FTSJobid']), 'w') jsondata = json.dumps(fts_job) ftsjob_file.write(jsondata) ftsjob_file.close() self.logger.debug("%s ready." % fts_job) # Prepare Dashboard report for lfn in fts_job['LFNs']: lfn_report = {} lfn_report['FTSJobid'] = fts_job['FTSJobid'] index = fts_job['LFNs'].index(lfn) lfn_report['PFN'] = fts_job['PFNs'][index] lfn_report['FTSFileid'] = fts_job['files_id'][index] lfn_report['Workflow'] = jobs_report[link][index][2] lfn_report['JobVersion'] = jobs_report[link][index][1] job_id = '%d_https://glidein.cern.ch/%d/%s_%s' % (int(jobs_report[link][index][0]), int(jobs_report[link][index][0]), lfn_report['Workflow'].replace("_", ":"), lfn_report['JobVersion']) lfn_report['JobId'] = job_id lfn_report['URL'] = self.fts_server_for_transfer self.logger.debug("Creating json file %s in %s for FTS3 Dashboard" % (lfn_report, self.dropbox_dir)) dash_job_file = open('/tmp/Dashboard.%s.json' % getHashLfn(lfn_report['PFN']), 'w') jsondata = json.dumps(lfn_report) dash_job_file.write(jsondata) dash_job_file.close() self.logger.debug("%s ready for FTS Dashboard report." % lfn_report) return