def datasets_dbs(self): """ Retrieve a list of DBS datasets """ params = {'dataset_access_type':'VALID'} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + '/datasets?' + encoded_data req = urllib2.Request(url) ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) stream = urllib2.urlopen(req) gen = json.load(stream) for row in gen: dataset = row['dataset'] rec = {'dataset': dataset} if self.write_hash: storage_query = {"fields": ["dataset"], "spec": [{"key": "dataset.name", "value": "\"%s\"" % dataset}], "instance": self.dbcoll} rec.update({'qhash': genkey(storage_query)}) yield rec stream.close()
def fetch_values(self): """ fetch the data from providers and select the final values with jsonpath rules """ # use grid-proxy for authentication ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # request list of possible values params = {} encoded_data = urllib.urlencode(params, doseq=True) service = self.cfg url = service['url'] + encoded_data print(str(url)) req = urllib2.Request(url) # ensure we get json (sitedb is messed up and randomly returns xml) if service['jsonpath_selector']: req.add_header('Accept', 'application/json') #print req.get_full_url() stream = urllib2.urlopen(req) if service['jsonpath_selector']: response = json.load(stream) jsonpath_expr = parse(service['jsonpath_selector']) results = jsonpath_expr.find(response) stream.close() return ({'value': v.value} for v in results) return []
def __init__(self, name, config): self.name = name try: self.verbose = config['verbose'] title = 'DASAbstactService_%s' % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config['dasmapping'] self.analytics = config['dasanalytics'] self.write2cache = config.get('write_cache', True) self.multitask = config['das'].get('multitask', True) self.error_expire = config['das'].get('error_expire', 300) if config.has_key('dbs'): self.dbs_global = config['dbs'].get('dbs_global_instance', None) else: self.dbs_global = None dburi = config['mongodb']['dburi'] engine = config.get('engine', None) self.gfs = db_gridfs(dburi) except Exception as exc: print_exc(exc) raise Exception('fail to parse DAS config') # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config['das'].get('api_workers', 3) thr_weights = config['das'].get('thread_weights', []) for system_weight in thr_weights: system, weight = system_weight.split(':') if system == self.name: nworkers *= int(weight) if engine: thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name self.taskmgr = PluginTaskManager(\ engine, nworkers=nworkers, name=thr_name) self.taskmgr.subscribe() else: thr_name = 'DASAbstractService:%s:TaskManager' % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info('initialized') # define internal cache manager to put 'raw' results into cache if config.has_key('rawcache') and config['rawcache']: self.localcache = config['rawcache'] else: msg = 'Undefined rawcache, please check your configuration' raise Exception(msg)
def datasets_dbs3(self): """ Retrieve a list of DBS datasets (DBS3) """ params = {"dataset_access_type": "VALID"} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + "/datasets?" + encoded_data req = urllib2.Request(url) ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) stream = urllib2.urlopen(req) gen = json.load(stream) for row in gen: dataset = row["dataset"] rec = {"dataset": dataset} if self.write_hash: storage_query = { "fields": ["dataset"], "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}], "instance": self.dbcoll, } rec.update({"qhash": genkey(storage_query)}) yield rec stream.close()
def __init__(self, name, config): self.name = name try: self.verbose = config['verbose'] title = 'DASAbstactService_%s' % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config['dasmapping'] self.write2cache = config.get('write_cache', True) self.multitask = config['das'].get('multitask', True) self.error_expire = config['das'].get('error_expire', 300) self.dbs_global = None # to be configured at run time self.dburi = config['mongodb']['dburi'] engine = config.get('engine', None) self.gfs = db_gridfs(self.dburi) except Exception as exc: print_exc(exc) raise Exception('fail to parse DAS config') # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config['das'].get('api_workers', 3) thr_weights = config['das'].get('thread_weights', []) for system_weight in thr_weights: system, weight = system_weight.split(':') if system == self.name: nworkers *= int(weight) # if engine: # thr_name = 'DASAbstractService:%s:PluginTaskManager' % self.name # self.taskmgr = PluginTaskManager(\ # engine, nworkers=nworkers, name=thr_name) # self.taskmgr.subscribe() # else: # thr_name = 'DASAbstractService:%s:TaskManager' % self.name # self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) thr_name = 'DASAbstractService:%s:TaskManager' % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info('initialized') # define internal cache manager to put 'raw' results into cache if 'rawcache' in config and config['rawcache']: self.localcache = config['rawcache'] else: msg = 'Undefined rawcache, please check your configuration' raise Exception(msg)
def runsummary(run, debug): """Test RunSummary for given run number""" pat = '<runNumber>%s</runNumber>' % run key, cert = get_key_cert() url = 'https://cmswbm.web.cern.ch/cmswbm/cmsdb/servlet/RunSummary?' url += 'RUN=%s&DB=cms_omds_lb&FORMAT=XML' % run data = get_data(url, key, cert, debug) for line in data.read().split('\n'): if line == pat: return pat
def runsummary(run, debug): """Test RunSummary for given run number""" pat = "<runNumber>%s</runNumber>" % run key, cert = get_key_cert() url = "https://cmswbm.web.cern.ch/cmswbm/cmsdb/servlet/RunSummary?" url += "RUN=%s&DB=cms_omds_lb&FORMAT=XML" % run data = get_data(url, key, cert, debug) for line in data.read().split("\n"): if line == pat: return pat
def __init__(self, name, config): self.name = name try: self.verbose = config["verbose"] title = "DASAbstactService_%s" % self.name self.logger = PrintManager(title, self.verbose) self.dasmapping = config["dasmapping"] self.write2cache = config.get("write_cache", True) self.multitask = config["das"].get("multitask", True) self.error_expire = config["das"].get("error_expire", 300) self.dbs_global = None # to be configured at run time self.dburi = config["mongodb"]["dburi"] engine = config.get("engine", None) self.gfs = db_gridfs(self.dburi) except Exception as exc: print_exc(exc) raise Exception("fail to parse DAS config") # read key/cert info try: self.ckey, self.cert = get_key_cert() except Exception as exc: print_exc(exc) self.ckey = None self.cert = None if self.multitask: nworkers = config["das"].get("api_workers", 3) thr_weights = config["das"].get("thread_weights", []) for system_weight in thr_weights: system, weight = system_weight.split(":") if system == self.name: nworkers *= int(weight) if engine: thr_name = "DASAbstractService:%s:PluginTaskManager" % self.name self.taskmgr = PluginTaskManager(engine, nworkers=nworkers, name=thr_name) self.taskmgr.subscribe() else: thr_name = "DASAbstractService:%s:TaskManager" % self.name self.taskmgr = TaskManager(nworkers=nworkers, name=thr_name) else: self.taskmgr = None self.map = {} # to be defined by data-service implementation self._keys = None # to be defined at run-time in self.keys self._params = None # to be defined at run-time in self.parameters self._notations = {} # to be defined at run-time in self.notations self.logger.info("initialized") # define internal cache manager to put 'raw' results into cache if "rawcache" in config and config["rawcache"]: self.localcache = config["rawcache"] else: msg = "Undefined rawcache, please check your configuration" raise Exception(msg)
def test(): """Test main function""" dbs_url = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' dbs_url = 'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet' rr_url = 'http://localhost:8081/runregistry' ckey, cert = get_key_cert() dataset = '/DoubleElectron/Run2012A-13Jul2012-v1/AOD' kwds = dict(dbs_url=dbs_url, rr_url=rr_url, ckey=ckey, cert=cert, dataset=dataset) for row in lumis4dataset(kwds): print(row)
def __init__(self, config=None): super(LumiService, self).__init__() if not config: config = {} self.dasconfig = das_readconfig() self.service_name = config.get('name', 'combined') self.service_api = config.get('api', 'combined_lumi4dataset') self.uri = self.dasconfig['mongodb']['dburi'] self.urls = None # defined at run-time via self.init() self.expire = None # defined at run-time via self.init() self.ckey, self.cert = get_key_cert() self.init()
def datasets_dbs3(self): """ Retrieve a list of DBS datasets (DBS3) """ params = {'dataset_access_type':'PRODUCTION'} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + '/datasets?' + encoded_data req = urllib2.Request(url) ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) stream = urllib2.urlopen(req) gen = json.load(stream) for row in gen: yield row stream.close()
def datasets_dbs3(urls, verbose=0): """DBS3 implementation of datasets function""" headers = {'Accept':'application/json;text/json'} records = [] url = urls.get('dbs') params = {'detail':'True', 'dataset_access_type':'PRODUCTION'} ckey, cert = get_key_cert() data, _ = getdata(url, params, headers, verbose=verbose, ckey=ckey, cert=cert, doseq=False) records = json.load(data) data.close() data = {} size = 10 # size for POST request to Phedex for row in records: if not data.has_key(row['dataset']): data[row['dataset']] = \ dict(era=row['acquisition_era_name'], tier=row['data_tier_name']) if len(data.keys()) > size: for rec in dataset_info(urls, data): yield rec data = {} if data: for rec in dataset_info(urls, data): yield rec
ReqMgr service """ __author__ = "Valentin Kuznetsov" # system modules import time # DAS modules from DAS.services.abstract_service import DASAbstractService from DAS.utils.utils import map_validator, get_key_cert, json_parser from DAS.utils.url_utils import getdata from DAS.utils.urlfetch_pycurl import getdata as urlfetch_getdata import DAS.utils.jsonwrapper as json CKEY, CERT = get_key_cert() def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False): """ Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS. We need to query ReqMgr data-service cache and find workflow ids by outputdataset name. The ReqMgr returns either document with ids used by MCM (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take id of the request which bypass MCM. For refences see these discussions: https://github.com/dmwm/DAS/issues/4045 https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html """ params = {'key': '"%s"' % dataset, 'include_docs':'true'} url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \ % base headers = {'Accept': 'application/json;text/json'}
""" ReqMgr2 service """ __author__ = "Valentin Kuznetsov" # system modules import json import time # DAS modules from DAS.services.abstract_service import DASAbstractService from DAS.utils.utils import map_validator, get_key_cert, json_parser from DAS.utils.url_utils import getdata from DAS.utils.urlfetch_pycurl import getdata as urlfetch_getdata CKEY, CERT = get_key_cert() def get_ids(url, params, dataset, verbose=False): "Query either ReqMgr2 or WMStats and retrieve request ids" headers = {'Accept': 'application/json;text/json'} expire = 600 # dummy number, we don't need it here ids = [] source, expire = \ getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose) for row in json_parser(source, None): for rec in row.get('rows', []): doc = rec['doc'] found = 0 if not doc:
def apicall(self, dasquery, url, api, args, dformat, expire): """ Invoke DBS API to execute given query. Return results as a list of dict, e.g. [{'run':1,'dataset':/a/b/c'}, ...] """ # translate selection keys into ones data-service APIs provides cond = dasquery.mongo_query['spec'] args = dict(self.params) for key, value in cond.iteritems(): if isinstance(value, dict): # we got equal condition if key == 'date': if isinstance(value, list) and len(value) != 2: msg = 'RunSummary service requires 2 time stamps.' msg += 'Please use either date last XXh format or' msg += 'date in YYYYMMDD-YYYYMMDD' raise Exception(msg) args['TIME_BEGIN'] = convert_datetime(value[0]) args['TIME_END'] = convert_datetime(value[1]) else: for param in self.dasmapping.das2api(self.name, key): args[param] = value elif key == 'run.number' or key == 'run.run_number': minrun = None maxrun = None for oper, val in value.iteritems(): if oper == '$in': minrun = int(val[0]) maxrun = int(val[-1]) elif oper == '$lt': maxrun = int(val) - 1 elif oper == '$lte': maxrun = int(val) elif oper == '$gt': minrun = int(val) + 1 elif oper == '$gte': minrun = int(val) else: msg = 'RunSummary does not support operator %s' % oper raise Exception(msg) args['RUN_BEGIN'] = minrun args['RUN_END'] = maxrun elif key == 'date' and value.has_key('$in') and \ len(value['$in']) == 2: date1, date2 = value['$in'] args['TIME_BEGIN'] = convert_datetime(date1) args['TIME_END'] = convert_datetime(date2) else: # we got some operator, e.g. key :{'$in' : [1,2,3]} # TODO: not sure how to deal with them right now, will throw msg = 'RunSummary does not support value %s for key=%s' \ % (value, key) raise Exception(msg) if args == self.params: # no parameter is provided args['TIME_END'] = convert_datetime(time.time()) args['TIME_BEGIN'] = convert_datetime(time.time() - 24*60*60) key, cert = get_key_cert() debug = 0 if self.verbose > 1: debug = 1 try: time0 = time.time() api = self.map.keys()[0] # we only register 1 API url = self.map[api]['url'] expire = self.map[api]['expire'] msg = 'DASAbstractService::%s::getdata(%s, %s)' \ % (self.name, url, args) self.logger.info(msg) data = get_data(run_summary_url(url, args), key, cert, debug) genrows = self.parser(data, api) ctime = time.time()-time0 self.write_to_cache(\ dasquery, expire, url, api, args, genrows, ctime) except: traceback.print_exc() msg = 'Fail to process: url=%s, api=%s, args=%s' \ % (url, api, args) self.logger.warning(msg)
def apicall(self, dasquery, url, api, args, dformat, expire): """ Invoke DBS API to execute given query. Return results as a list of dict, e.g. [{'run':1,'dataset':/a/b/c'}, ...] """ # translate selection keys into ones data-service APIs provides cond = dasquery.mongo_query['spec'] args = dict(self.params) for key, value in cond.items(): if isinstance(value, dict): # we got equal condition if key == 'date': if isinstance(value, list) and len(value) != 2: msg = 'RunSummary service requires 2 time stamps.' msg += 'Please use either date last XXh format or' msg += 'date in YYYYMMDD-YYYYMMDD' raise Exception(msg) args['TIME_BEGIN'] = convert_datetime(value[0]) args['TIME_END'] = convert_datetime(value[1]) else: for param in self.dasmapping.das2api(self.name, api, key): args[param] = value elif key == 'run.number' or key == 'run.run_number': minrun = None maxrun = None for oper, val in value.items(): if oper == '$in': minrun = int(val[0]) maxrun = int(val[-1]) elif oper == '$lt': maxrun = int(val) - 1 elif oper == '$lte': maxrun = int(val) elif oper == '$gt': minrun = int(val) + 1 elif oper == '$gte': minrun = int(val) else: msg = 'RunSummary does not support operator %s' % oper raise Exception(msg) args['RUN_BEGIN'] = minrun args['RUN_END'] = maxrun elif key == 'date' and '$in' in value and \ len(value['$in']) == 2: date1, date2 = value['$in'] args['TIME_BEGIN'] = convert_datetime(date1) args['TIME_END'] = convert_datetime(date2) else: # we got some operator, e.g. key :{'$in' : [1,2,3]} # TODO: not sure how to deal with them right now, will throw msg = 'RunSummary does not support value %s for key=%s' \ % (value, key) raise Exception(msg) if args == self.params: # no parameter is provided args['TIME_END'] = convert_datetime(time.time()) args['TIME_BEGIN'] = convert_datetime(time.time() - 24*60*60) key, cert = get_key_cert() debug = 0 if self.verbose > 1: debug = 1 try: time0 = time.time() api = list(self.map.keys())[0] # we only register 1 API url = self.map[api]['url'] expire = self.map[api]['expire'] msg = 'DASAbstractService::%s::getdata(%s, %s)' \ % (self.name, url, args) self.logger.info(msg) data = get_data(run_summary_url(url, args), key, cert, debug) genrows = self.parser(data, api) ctime = time.time()-time0 self.write_to_cache(\ dasquery, expire, url, api, args, genrows, ctime) except: traceback.print_exc() msg = 'Fail to process: url=%s, api=%s, args=%s' \ % (url, api, args) self.logger.warning(msg)