def configs(url, args, verbose=False): """Find config info in ReqMgr""" headers = {'Accept': 'application/json;text/json'} dataset = args.get('dataset', None) if not dataset: return base = 'https://%s' % url.split('/')[2] ids = findReqMgrIds(dataset, base, verbose) # probe to find configs in showWorkload urls = ['%s/reqmgr/view/showWorkload?requestName=%s' % (base, i) for i in ids] gen = urlfetch_getdata(urls, CKEY, CERT, headers) config_urls = [] for row in gen: if 'error' not in row: for line in row['data'].split('\n'): if line.rfind("/configFile") != -1: cfg = line.split('=')[-1].strip() cfg = cfg.replace('<br/>', '').replace("'",'') config_urls.append(cfg) if config_urls: urls = config_urls else: urls = ['%s/%s/configFile' % (url, i) for i in ids] gen = urlfetch_getdata(urls, CKEY, CERT, headers) for row in gen: if 'error' in row: error = row.get('error') reason = row.get('reason', '') yield {'error':error, 'reason':reason} else: config = {'data':row['data'], 'dataset':dataset, 'name':'ReqMgr', 'ids': ids, 'urls': urls} yield {'config':config}
def files4site(phedex_url, files, site): "Find site for given files" params = {} if site and phedex_node_pattern.match(site): if not site.endswith('*'): # this will account to look-up site names w/o _Buffer or _MSS site += '*' params.update({'node': site}) elif site and se_pattern.match(site): params.update({'se': site}) else: return sname = urllib.urlencode(params) urls = [] for fname in files: url = '%s?lfn=%s&%s' % (phedex_url, fname, sname) urls.append(url) tags = 'block.replica.node' prim_key = 'block' gen = urlfetch_getdata(urls, CKEY, CERT) for rec in gen: if 'error' in rec.keys(): yield rec else: # convert record string into StringIO for xml_parser source = StringIO.StringIO(rec['data']) for row in xml_parser(source, prim_key, tags): fobj = row['block']['file'] fname = fobj['name'] replica = fobj['replica'] for item in replica: yield fname
def configs(url, args, verbose=False): """Find config info in ReqMgr2""" headers = {'Accept': 'application/json;text/json'} dataset = args.get('dataset', None) if not dataset: return base = 'https://%s' % url.split('/')[2] idict, source = findReqMgrIds(dataset, base, verbose) ids = [] ids_types = {} # keep track of ids/types relationship for key, ilist in idict.items(): rtype = 'output' if key.lower().find('output') != -1 else 'input' for item in ilist: ids.append(item) ids_types[item] = rtype # for hash ids find configs via ReqMgr2 REST API urls = [rurl(base, i) for i in ids if i and len(i) == 32] # for non-hash ids probe to find configs in showWorkload req_urls = ['%s/couchdb/reqmgr_workload_cache/%s' \ % (base, i) for i in ids if i and len(i) != 32] if req_urls: gen = urlfetch_getdata(req_urls, CKEY, CERT, headers) config_urls = [] for row in gen: if 'error' not in row: url = row['url'] for key, rtype in ids_types.items(): if key in url: break rdict = json.loads(row['data']) for key in rdict.keys(): val = rdict[key] if key.endswith('ConfigCacheID'): if isinstance(val, basestring): config_urls.append(rurl(base, val)) ids_types[val] = rtype elif isinstance(val, dict): for kkk in val.keys(): if kkk.endswith('ConfigCacheID'): vvv = val[kkk] if isinstance(vvv, basestring): config_urls.append(rurl(base, vvv)) ids_types[vvv] = rtype if config_urls: urls += config_urls udict = {} for rid, rtype in ids_types.items(): for url in set(urls): if str(rid) in str(url): udict.setdefault(rtype, []).append(url) config = { 'dataset': dataset, 'name': source, 'urls': udict, 'ids': ids, 'idict': idict } yield {'config': config}
def configs(url, args, verbose=False): """Find config info in ReqMgr""" headers = {'Accept': 'application/json;text/json'} dataset = args.get('dataset', None) if not dataset: return base = 'https://%s' % url.split('/')[2] idict, source = findReqMgrIds(dataset, base, verbose) ids = [] ids_types = {} # keep track of ids/types relationship for key, ilist in idict.items(): rtype = 'output' if key.lower().find('output') != -1 else 'input' for item in ilist: ids.append(item) ids_types[item] = rtype # for hash ids find configs via ReqMgr REST API urls = [rurl(base, i) for i in ids if len(i) == 32] # for non-hash ids probe to find configs in showWorkload req_urls = ['%s/couchdb/reqmgr_workload_cache/%s' \ % (base, i) for i in ids if len(i) != 32] if req_urls: gen = urlfetch_getdata(req_urls, CKEY, CERT, headers) config_urls = [] for row in gen: if 'error' not in row: url = row['url'] for key, rtype in ids_types.items(): if key in url: break rdict = json.loads(row['data']) for key in rdict.keys(): val = rdict[key] if key.endswith('ConfigCacheID'): if isinstance(val, basestring): config_urls.append(rurl(base, val)) ids_types[val] = rtype elif isinstance(val, dict): for kkk in val.keys(): if kkk.endswith('ConfigCacheID'): vvv = val[kkk] if isinstance(vvv, basestring): config_urls.append(rurl(base, vvv)) ids_types[vvv] = rtype if config_urls: urls += config_urls udict = {} for rid, rtype in ids_types.items(): for url in set(urls): if rid in url: udict.setdefault(rtype, []).append(url) config = {'dataset':dataset, 'name': source, 'urls': udict, 'ids': ids, 'idict': idict} yield {'config': config}
def phedex_info(urls, dbsdata): "Get phedex info for given set of dbs data" # create list of URLs for urlfetch url = urls.get('phedex') + '/blockReplicas' urls = ('%s?dataset=%s' % (url, d) for d in dbsdata.keys()) headers = {'Accept':'application/json;text/json'} gen = urlfetch_getdata(urls, CKEY, CERT, headers) for ddict in gen: try: jsondict = json.loads(ddict['data']) except Exception as _exc: continue rec = {} for blk in jsondict['phedex']['block']: dataset = blk['name'].split('#')[0] if 'nfiles' not in rec: nfiles = blk['files'] size = blk['bytes'] else: nfiles = rec['nfiles'] + blk['files'] size = rec['size'] + blk['bytes'] rec.update({'nfiles':nfiles, 'size':size}) for rep in blk['replica']: if 'site' not in rec: rec = dict(dataset=dataset, nfiles=nfiles, size=size, site=[rep['node']], se=[rep['se']], custodial=[rep['custodial']]) rec.update(dbsdata[dataset]) else: sites = rec['site'] ses = rec['se'] custodial = rec['custodial'] if rep['node'] not in sites: sites.append(rep['node']) ses.append(rep['se']) custodial.append(rep['custodial']) rec.update({'site':sites, 'se':ses, 'custodial':custodial}) if rec: # unwrap the site/se/custodial lists and yield records w/ their # individual values for idx in range(0, len(rec['site'])): sename = rec['se'][idx] site = rec['site'][idx] custodial = rec['custodial'][idx] newrec = dict(rec) newrec['se'] = sename newrec['site'] = site newrec['custodial'] = custodial yield newrec
def file_run_lumis(url, blocks, runs=None, valid=None, verbose=0): """ Find file, run, lumi tuple for given set of files and (optional) runs. """ headers = {'Accept': 'application/json;text/json'} urls = [] for blk in blocks: if not blk: continue dbs_url = '%s/filelumis/?block_name=%s' % (url, urllib.quote(blk)) if valid: dbs_url += '&validFileOnly=1' if runs: dbs_url += "&run_num=%s" % urllib.quote(str(runs)) urls.append(dbs_url) if not urls: return if verbose > 1: print("\nDEBUG: file_run_lumis") print(urls) gen = urlfetch_getdata(urls, CKEY, CERT, headers) odict = {} # output dict for rec in gen: if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') yield {'error':error, 'reason':reason} else: for row in json.loads(rec['data']): run = row['run_num'] lfn = row['logical_file_name'] lumilist = row['lumi_section_num'] eventlist = row.get('event_count', []) key = (lfn, run) for idx, lumi in enumerate(lumilist): if len(eventlist) > 0: evts = eventlist[idx] else: evts = None odict.setdefault(key, []).append((lumi, evts)) for key, values in odict.items(): lfn, run = key lumis = [] evts = [] for lumi, evt in values: lumis.append(lumi) evts.append(evt) yield lfn, run, lumis, evts
def block_summary(dbs, blocks): "Get block summary information for given set of blocks" headers = {"Accept": "text/json;application/json"} url = dbs + "/blocksummaries" urls = ["%s/?block_name=%s" % (url, urllib.quote(b)) for b in blocks] res = urlfetch_getdata(urls, CKEY, CERT, headers) for row in res: if "error" in row: error = row.get("error") reason = row.get("reason", "") yield {"error": error, "reason": reason} continue url = row["url"] blk = urllib.unquote(url.split("=")[-1]) for rec in json.loads(row["data"]): data = {"name": blk, "size": rec["file_size"], "nfiles": rec["num_file"], "nevents": rec["num_event"]} yield dict(block=data)
def block_summary(dbs, blocks): "Get block summary information for given set of blocks" headers = {'Accept':'text/json;application/json'} url = dbs + "/blocksummaries" urls = ['%s/?block_name=%s' % (url, urllib.quote(b)) for b in blocks] res = urlfetch_getdata(urls, CKEY, CERT, headers) for row in res: if 'error' in row: error = row.get('error') reason = row.get('reason', '') yield {'error':error, 'reason':reason} continue url = row['url'] blk = urllib.unquote(url.split('=')[-1]) for rec in json.loads(row['data']): data = {'name': blk, 'size': rec['file_size'], 'nfiles': rec['num_file'], 'nevents': rec['num_event']} yield dict(block=data)
def blocks4tier_date(dbs, tier, min_cdate, max_cdate, verbose=0): "Get list of blocks for given parameters" headers = {"Accept": "text/json;application/json"} url = dbs + "/blocks" params = {"data_tier_name": tier, "min_cdate": min_cdate, "max_cdate": max_cdate} urls = ["%s?%s" % (url, urllib.urlencode(params))] if verbose > 1: print "\nblocks4tier_date" print urls res = process(urlfetch_getdata(urls, CKEY, CERT, headers)) err = "Unable to get blocks for tier=%s, mindate=%s, maxdate=%s" % (tier, min_cdate, max_cdate) for blist in res: if "error" in blist: yield blist continue if isinstance(blist, dict): if "block_name" not in blist: msg = err + ", reason=%s" % json.dumps(blist) raise Exception(msg) for row in blist: yield row["block_name"]
def block_run_lumis(url, blocks, runs=None): """ Find block, run, lumi tuple for given set of files and (optional) runs. """ headers = {'Accept': 'text/xml'} urls = [] for blk in blocks: if not blk: continue query = 'find block,run,lumi where block=%s' % blk if runs and isinstance(runs, list): val = ' or '.join(['run=%s' % r for r in runs]) query += ' and (%s)' % val params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} dbs_url = url + '?' + urllib.urlencode(params) urls.append(dbs_url) if not urls: return gen = urlfetch_getdata(urls, CKEY, CERT, headers) prim_key = 'row' odict = {} # output dict for rec in gen: if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') print dastimestamp('DAS ERROR'), error, reason yield {'error': error, 'reason': reason} else: source = StringIO.StringIO(rec['data']) lumis = [] for row in qlxml_parser(source, prim_key): run = row['row']['run'] blk = row['row']['block'] lumi = row['row']['lumi'] key = (blk, run) odict.setdefault(key, []).append(lumi) for key, lumis in odict.iteritems(): blk, run = key yield blk, run, lumis
def block_run_lumis(url, blocks, runs=None, verbose=0): """ Find block, run, lumi tuple for given set of files and (optional) runs. """ headers = {"Accept": "application/json;text/json"} urls = [] params = {} for blk in blocks: if not blk: continue dbs_url = "%s/filelumis/?block_name=%s" % (url, urllib.quote(blk)) if runs and isinstance(runs, list): params.update({"run_num": runrange(runs[0], runs[-1], False)}) urls.append(dbs_url) if not urls: return if verbose > 1: print "\nDEBUG: block_run_lumis" print urls gen = urlfetch_getdata(urls, CKEY, CERT, headers) odict = {} # output dict for rec in gen: blk = urllib.unquote(url_args(rec["url"])["block_name"]) if "error" in rec: error = rec.get("error") reason = rec.get("reason", "") yield {"error": error, "reason": reason} else: for row in json.loads(rec["data"]): run = row["run_num"] lumilist = row["lumi_section_num"] key = (blk, run) for lumi in lumilist: odict.setdefault(key, []).append(lumi) for key, lumis in odict.iteritems(): blk, run = key yield blk, run, lumis
def block_run_lumis(url, blocks, runs=None, verbose=0): """ Find block, run, lumi tuple for given set of files and (optional) runs. """ headers = {'Accept': 'application/json;text/json'} urls = [] params = {} for blk in blocks: if not blk: continue dbs_url = '%s/filelumis/?block_name=%s' % (url, urllib.quote(blk)) if runs and isinstance(runs, list): params.update({'run_num': urllib.quote(str(runs))}) urls.append(dbs_url) if not urls: return if verbose > 1: print("\nDEBUG: block_run_lumis") print(urls) gen = urlfetch_getdata(urls, CKEY, CERT, headers) odict = {} # output dict for rec in gen: blk = urllib.unquote(url_args(rec['url'])['block_name']) if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') yield {'error':error, 'reason':reason} else: for row in json.loads(rec['data']): run = row['run_num'] lumilist = row['lumi_section_num'] key = (blk, run) for lumi in lumilist: odict.setdefault(key, []).append(lumi) for key, lumis in odict.items(): blk, run = key yield blk, run, lumis
def blocks4tier_date(dbs, tier, min_cdate, max_cdate, verbose=0): "Get list of blocks for given parameters" headers = {'Accept':'text/json;application/json'} url = dbs + "/blocks" params = {'data_tier_name':tier, 'min_cdate':min_cdate, 'max_cdate':max_cdate} urls = ['%s?%s' % (url, urllib.urlencode(params))] if verbose > 1: print("\nblocks4tier_date") print(urls) res = process(urlfetch_getdata(urls, CKEY, CERT, headers)) err = 'Unable to get blocks for tier=%s, mindate=%s, maxdate=%s' \ % (tier, min_cdate, max_cdate) for blist in res: if 'error' in blist: yield blist continue if isinstance(blist, dict): if 'block_name' not in blist: msg = err + ', reason=%s' % json.dumps(blist) raise Exception(msg) for row in blist: yield row['block_name']
def summary4dataset_run(url, kwds): "Helper function to deal with summary dataset=/a/b/c requests" urls = [] cond = '' val = kwds.get('run', 'optional') if val != 'optional': if isinstance(val, dict): min_run = 0 max_run = 0 if '$lte' in val: max_run = val['$lte'] if '$gte' in val: min_run = val['$gte'] if min_run and max_run: val = "run >=%s and run <= %s" % (min_run, max_run) elif '$in' in val: val = ' or '.join(['run=%s' % r for r in val['$in']]) val = '(%s)' % val elif isinstance(val, int): val = "run=%d" % val cond += ' and %s' % val val = kwds.get('dataset', None) if val and val != 'optional': cond += ' and dataset=%s' % val val = kwds.get('block', None) if val and val != 'optional': cond += ' and block=%s' % val query = "find file, file.size, file.numevents where " + cond[4:] params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} url1 = url + '?' + urllib.urlencode(params) urls.append(url1) query = "find run, count(lumi) where " + cond[4:] params = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query} url2 = url + '?' + urllib.urlencode(params) urls.append(url2) headers = {'Accept': 'text/xml'} gen = urlfetch_getdata(urls, CKEY, CERT, headers) tot_size = 0 tot_evts = 0 tot_lumis = 0 tot_files = 0 for rec in gen: if 'error' in rec: error = rec.get('error') reason = rec.get('reason', '') srec = {'summary':'', 'error':error, 'reason':reason} yield srec url = rec['url'] data = rec['data'] stream = StringIO.StringIO(data) if url.find('file') != -1: prim_key = 'file' else: prim_key = 'run' for row in qlxml_parser(stream, prim_key): if prim_key == 'file': fdata = row['file'] tot_size += fdata['file.size'] tot_evts += fdata['file.numevents'] tot_files += 1 else: fdata = row['run'] tot_lumis += fdata['count_lumi'] srec = {'summary': {'file_size':tot_size, 'nevents':tot_evts, 'nlumis':tot_lumis, 'nfiles': tot_files}} yield srec