def getdata_helper(self, url, params, expire, headers=None, post=None): "Helper function to get data from SiteDB or local cache" cname = url.split('/')[-1].replace('-', '_') col = self.localcache.conn[self.name][cname] local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}}) data = None if local: msg = 'SiteDBService reads from %s.%s' % (self.name, cname) self.logger.info(msg) try: # get data from local cache data = [r for r in col.find() if not r.has_key('expire')][0] del data['_id'] except Exception as exc: print_exc(exc) data = {} if not data or not local: headers = {'Accept':'application/json'} datastream, expire = getdata(\ url, params, headers, expire, post, self.error_expire, self.verbose, self.ckey, self.cert, system=self.name) try: # read data and write it to local cache data = json.load(datastream) datastream.close() col.remove() col.insert(data) col.insert({'expire':expire_timestamp(expire)}) except Exception as exc: print_exc(exc) return data, expire
def datasets_dbs3(self): """ Retrieve a list of DBS datasets (DBS3) """ params = {"dataset_access_type": "VALID"} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + "/datasets?" + encoded_data req = urllib2.Request(url) ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) stream = urllib2.urlopen(req) gen = json.load(stream) for row in gen: dataset = row["dataset"] rec = {"dataset": dataset} if self.write_hash: storage_query = { "fields": ["dataset"], "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}], "instance": self.dbcoll, } rec.update({"qhash": genkey(storage_query)}) yield rec stream.close()
def fetch_values(self): """ fetch the data from providers and select the final values with jsonpath rules """ # use grid-proxy for authentication ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) # request list of possible values params = {} encoded_data = urllib.urlencode(params, doseq=True) service = self.cfg url = service['url'] + encoded_data print(str(url)) req = urllib2.Request(url) # ensure we get json (sitedb is messed up and randomly returns xml) if service['jsonpath_selector']: req.add_header('Accept', 'application/json') #print req.get_full_url() stream = urllib2.urlopen(req) if service['jsonpath_selector']: response = json.load(stream) jsonpath_expr = parse(service['jsonpath_selector']) results = jsonpath_expr.find(response) stream.close() return ({'value': v.value} for v in results) return []
def worker_v3(url, query): """ Query RunRegistry service, see documentation at https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi url=http://runregistry.web.cern.ch/runregistry/ """ workspace = 'GLOBAL' table = 'runsummary' template = 'json' columns = ['number', 'startTime', 'stopTime', 'triggers', 'runClassName', 'runStopReason', 'bfield', 'gtKey', 'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy', 'runCreated', 'modified', 'lsCount', 'lsRanges'] sdata = json.dumps({'filter':query}) path = 'api/GLOBAL/%s/%s/%s/none/data' \ % (table, template, urllib.quote(','.join(columns))) callurl = os.path.join(url, path) result = urllib.urlopen(callurl, sdata) record = json.load(result) result.close() notations = {'lsRanges':'lumi_section_ranges', 'number':'run_number', 'runCreated':'create_time', 'stopTime': 'end_time', 'startTime': 'start_time', 'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason', 'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey', 'lhcEnergy': 'beam_e', 'l1Menu': 'l1key', 'modified': 'modify_time', 'runClassName': 'group_name'} for rec in record: for key, val in rec.items(): if notations.has_key(key): rec[notations[key]] = val del rec[key] yield dict(run=rec)
def dataset_info(urls, datasetdict, verbose=0): """ Request blockReplicas information from Phedex for a given dataset or a list of dataset (use POST request in later case). Update MongoDB with aggregated information about dataset: site, size, nfiles, nblocks. """ url = urls.get('phedex') + '/blockReplicas' params = {'dataset': [d for d in datasetdict.keys()]} headers = {'Accept':'application/json;text/json'} data, _ = getdata(url, params, headers, post=True, \ ckey=CKEY, cert=CERT, verbose=verbose, system='dbs_phedex') if isinstance(data, basestring): # no response dastimestamp('DBS_PHEDEX ERROR: %s' % data) return jsondict = json.load(data) data.close() for row in jsondict['phedex']['block']: dataset = row['name'].split('#')[0] for rep in row['replica']: rec = dict(dataset=dataset, nfiles=row['files'], size=row['bytes'], site=rep['node'], se=rep['se'], custodial=rep['custodial']) rec.update(datasetdict[dataset]) yield rec data.close()
def run_lumis_dbs(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS for a given dataset" res = {} # output result api_url = url + '/blocks' params = {'dataset': dataset} data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for row in json.load(data): api_url = url + '/filelumis' params = {'block_name': row['block_name']} data, _ = \ getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for rec in json.load(data): run = rec['run_num'] lumi = rec['lumi_section_num'] res.setdefault(run, []).append(lumi) return res
def runs_dbs(url, dataset, ckey, cert): "Retrive list of run/lumis from DBS2 for a given dataset" api_url = url + '/runs' params = {'dataset': dataset} data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined') for row in json.load(data): run = row['run']['run_num'] yield run
def parse_data(data): """ Helper to parse input data """ for item in json.load(data): if isinstance(item, list): for row in item: yield row else: yield item
def datasets_dbs3(self): """ Retrieve a list of DBS datasets (DBS3) """ params = {'dataset_access_type':'PRODUCTION'} encoded_data = urllib.urlencode(params, doseq=True) url = self.dbs_url + '/datasets?' + encoded_data req = urllib2.Request(url) ckey, cert = get_key_cert() handler = HTTPSClientAuthHandler(ckey, cert) opener = urllib2.build_opener(handler) urllib2.install_opener(opener) stream = urllib2.urlopen(req) gen = json.load(stream) for row in gen: yield row stream.close()
def parse_data(data): """ Helper to parse input data """ if isinstance(data, basestring): data = StringIO.StringIO(data) try: jsondata = json.load(data) except Exception as exc: jsondata = [] msg = 'Unable to apply json.load to "%s"' % data print(msg) if isinstance(jsondata, dict): yield jsondata elif isinstance(jsondata, list): for row in jsondata: yield row
def datasets_dbs(urls, verbose=0): """DBS3 implementation of datasets function""" headers = {'Accept':'application/json;text/json'} records = [] url = urls.get('dbs3') + '/datasets' params = {'detail':'True', 'dataset_access_type':'VALID'} data, _ = getdata(url, params, headers, post=False, verbose=verbose, ckey=CKEY, cert=CERT, doseq=False, system='dbs3') records = json.load(data) data.close() dbsdata = {} for row in records: if row['dataset'] not in dbsdata: dbsdata[row['dataset']] = \ dict(era=row['acquisition_era_name'], tier=row['data_tier_name'], status='VALID') for row in phedex_info(urls, dbsdata): yield row
def worker_helper(url, query, table='runsummary'): """ Query RunRegistry service, see documentation at https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi url=http://runregistry.web.cern.ch/runregistry/ """ workspace = 'GLOBAL' template = 'json' if table == 'runsummary': columns = ['number', 'startTime', 'stopTime', 'triggers', 'runClassName', 'runStopReason', 'bfield', 'gtKey', 'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy', 'runCreated', 'modified', 'lsCount', 'lsRanges'] elif table == 'runlumis': columns = ['sectionFrom', 'sectionTo', 'runNumber'] sdata = {'filter':query} path = 'api/GLOBAL/%s/%s/%s/none/data' \ % (table, template, urllib.quote(','.join(columns))) callurl = os.path.join(url, path) result, _ = getdata(callurl, sdata, post=True) record = json.load(result) result.close() notations = {'lsRanges':'lumi_section_ranges', 'number':'run_number', 'runCreated':'create_time', 'runNumber': 'run_number', 'stopTime': 'end_time', 'startTime': 'start_time', 'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason', 'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey', 'lhcEnergy': 'beam_e', 'l1Menu': 'l1key', 'modified': 'modify_time', 'runClassName': 'group_name'} for rec in record: for key, val in rec.items(): if key in notations: rec[notations[key]] = val del rec[key] if table == 'runsummary': yield dict(run=rec) elif table == 'runlumis': if 'sectionTo' in rec and 'sectionFrom' in rec: rec['number'] = [i for i in \ range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)] yield dict(lumi=rec)
def json_parser(source, logger=None): """ JSON parser based on json module. It accepts either source descriptor with .read()-supported file-like object or data as a string object. """ if isinstance(source, InstanceType) or isinstance(source, file): # got data descriptor try: jsondict = json.load(source) except Exception as exc: print_exc(exc) source.close() raise source.close() else: data = source # to prevent unicode/ascii errors like # UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position if isinstance(data, basestring): data = unicode(data, errors='ignore') res = data.replace('null', '\"null\"') elif isinstance(data, object) and hasattr(data, 'read'): # StringIO res = data.read() else: res = data try: jsondict = json.loads(res) except: msg = "json_parser, WARNING: fail to JSON'ify data:" msg += "\n%s\ndata type %s" % (res, type(res)) if logger: logger.warning(msg) else: print msg jsondict = eval(res, { "__builtins__": None }, {}) yield jsondict
def sitedb_parser(source): """SiteDB parser""" if isinstance(source, str) or isinstance(source, unicode): data = json.loads(source) elif isinstance(source, InstanceType) or isinstance(source, file): # got data descriptor try: data = json.load(source) except Exception as exc: print_exc(exc) source.close() raise source.close() else: data = source if not isinstance(data, dict): raise Exception('Wrong data type, %s' % type(data)) if data.has_key('desc'): columns = data['desc']['columns'] for row in data['result']: yield rowdict(columns, row) else: for row in data['result']: yield row
def dataset_info(urls, datasetdict, verbose=0): """ Request blockReplicas information from Phedex for a given dataset or a list of dataset (use POST request in later case). Update MongoDB with aggregated information about dataset: site, size, nfiles, nblocks. """ url = urls.get('phedex') params = {'dataset': [d for d in datasetdict.keys()]} headers = {'Accept':'application/json;text/json'} data, _ = getdata(url, params, headers, post=True, verbose=verbose) jsondict = json.load(data) data.close() for row in jsondict['phedex']['block']: name = row['name'].split('#')[0] for rep in row['replica']: rec = dict(name=name, nfiles=row['files'], size=row['bytes'], site=rep['node'], se=rep['se'], custodial=rep['custodial']) rec.update(datasetdict[name]) yield rec
def datasets_dbs3(urls, verbose=0): """DBS3 implementation of datasets function""" headers = {'Accept':'application/json;text/json'} records = [] url = urls.get('dbs') params = {'detail':'True', 'dataset_access_type':'PRODUCTION'} ckey, cert = get_key_cert() data, _ = getdata(url, params, headers, verbose=verbose, ckey=ckey, cert=cert, doseq=False) records = json.load(data) data.close() data = {} size = 10 # size for POST request to Phedex for row in records: if not data.has_key(row['dataset']): data[row['dataset']] = \ dict(era=row['acquisition_era_name'], tier=row['data_tier_name']) if len(data.keys()) > size: for rec in dataset_info(urls, data): yield rec data = {} if data: for rec in dataset_info(urls, data): yield rec