Ejemplo n.º 1
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     col   = self.localcache.conn[self.name][cname]
     local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if not r.has_key('expire')][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Ejemplo n.º 2
0
 def datasets_dbs3(self):
     """
     Retrieve a list of DBS datasets (DBS3)
     """
     params = {"dataset_access_type": "VALID"}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + "/datasets?" + encoded_data
     req = urllib2.Request(url)
     ckey, cert = get_key_cert()
     handler = HTTPSClientAuthHandler(ckey, cert)
     opener = urllib2.build_opener(handler)
     urllib2.install_opener(opener)
     stream = urllib2.urlopen(req)
     gen = json.load(stream)
     for row in gen:
         dataset = row["dataset"]
         rec = {"dataset": dataset}
         if self.write_hash:
             storage_query = {
                 "fields": ["dataset"],
                 "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}],
                 "instance": self.dbcoll,
             }
             rec.update({"qhash": genkey(storage_query)})
         yield rec
     stream.close()
Ejemplo n.º 3
0
    def fetch_values(self):
        """ fetch the data from providers and select the final values
         with jsonpath rules """
        # use grid-proxy for authentication
        ckey, cert = get_key_cert()

        handler = HTTPSClientAuthHandler(ckey, cert)
        opener = urllib2.build_opener(handler)
        urllib2.install_opener(opener)

        # request list of possible values
        params = {}
        encoded_data = urllib.urlencode(params, doseq=True)

        service = self.cfg
        url = service['url'] + encoded_data
        print(str(url))
        req = urllib2.Request(url)

        # ensure we get json (sitedb is messed up and randomly returns xml)
        if service['jsonpath_selector']:
            req.add_header('Accept', 'application/json')
            #print req.get_full_url()

        stream = urllib2.urlopen(req)

        if service['jsonpath_selector']:
            response = json.load(stream)
            jsonpath_expr = parse(service['jsonpath_selector'])
            results = jsonpath_expr.find(response)
            stream.close()

            return ({'value': v.value} for v in results)

        return []
Ejemplo n.º 4
0
def worker_v3(url, query):
    """
    Query RunRegistry service, see documentation at
    https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi
    url=http://runregistry.web.cern.ch/runregistry/
    """
    workspace = 'GLOBAL'
    table = 'runsummary'
    template = 'json'
    columns = ['number', 'startTime', 'stopTime', 'triggers',
               'runClassName', 'runStopReason', 'bfield', 'gtKey',
               'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy',
               'runCreated', 'modified', 'lsCount', 'lsRanges']
    sdata = json.dumps({'filter':query})
    path = 'api/GLOBAL/%s/%s/%s/none/data' \
                % (table, template, urllib.quote(','.join(columns)))
    callurl = os.path.join(url, path)
    result = urllib.urlopen(callurl, sdata)
    record = json.load(result)
    result.close()
    notations = {'lsRanges':'lumi_section_ranges',
            'number':'run_number', 'runCreated':'create_time',
            'stopTime': 'end_time', 'startTime': 'start_time',
            'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason',
            'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey',
            'lhcEnergy': 'beam_e', 'l1Menu': 'l1key',
            'modified': 'modify_time', 'runClassName': 'group_name'}
    for rec in record:
        for key, val in rec.items():
            if  notations.has_key(key):
                rec[notations[key]] = val
                del rec[key]
        yield dict(run=rec)
Ejemplo n.º 5
0
def dataset_info(urls, datasetdict, verbose=0):
    """
    Request blockReplicas information from Phedex for a given
    dataset or a list of dataset (use POST request in later case).
    Update MongoDB with aggregated information about dataset:
    site, size, nfiles, nblocks.
    """
    url      = urls.get('phedex') + '/blockReplicas'
    params   = {'dataset': [d for d in datasetdict.keys()]}
    headers  = {'Accept':'application/json;text/json'}
    data, _  = getdata(url, params, headers, post=True, \
            ckey=CKEY, cert=CERT, verbose=verbose, system='dbs_phedex')
    if  isinstance(data, basestring): # no response
        dastimestamp('DBS_PHEDEX ERROR: %s' % data)
        return
    jsondict = json.load(data)
    data.close()
    for row in jsondict['phedex']['block']:
        dataset = row['name'].split('#')[0]
        for rep in row['replica']:
            rec = dict(dataset=dataset,
                        nfiles=row['files'],
                        size=row['bytes'],
                        site=rep['node'],
                        se=rep['se'],
                        custodial=rep['custodial'])
            rec.update(datasetdict[dataset])
            yield rec
    data.close()
Ejemplo n.º 6
0
def run_lumis_dbs(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS for a given dataset"
    res      = {} # output result
    api_url  = url + '/blocks'
    params   = {'dataset': dataset}
    data, _  = getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
    for row in json.load(data):
        api_url = url + '/filelumis'
        params = {'block_name': row['block_name']}
        data, _  = \
            getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
        for rec in json.load(data):
            run  = rec['run_num']
            lumi = rec['lumi_section_num']
            res.setdefault(run, []).append(lumi)
    return res
Ejemplo n.º 7
0
def runs_dbs(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS2 for a given dataset"
    api_url  = url + '/runs'
    params   = {'dataset': dataset}
    data, _  = getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
    for row in json.load(data):
        run  = row['run']['run_num']
        yield run
Ejemplo n.º 8
0
def parse_data(data):
    """
    Helper to parse input data
    """

    for item in json.load(data):
        if  isinstance(item, list):
            for row in item:
                yield row
        else:
            yield item
Ejemplo n.º 9
0
 def datasets_dbs3(self):
     """
     Retrieve a list of DBS datasets (DBS3)
     """
     params = {'dataset_access_type':'PRODUCTION'}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + '/datasets?' + encoded_data
     req = urllib2.Request(url)
     ckey, cert = get_key_cert()
     handler = HTTPSClientAuthHandler(ckey, cert)
     opener  = urllib2.build_opener(handler)
     urllib2.install_opener(opener)
     stream = urllib2.urlopen(req)
     gen = json.load(stream)
     for row in gen:
         yield row
     stream.close()
Ejemplo n.º 10
0
def parse_data(data):
    """
    Helper to parse input data
    """
    if  isinstance(data, basestring):
        data = StringIO.StringIO(data)
    try:
        jsondata = json.load(data)
    except Exception as exc:
        jsondata = []
        msg = 'Unable to apply json.load to "%s"' % data
        print(msg)
    if  isinstance(jsondata, dict):
        yield jsondata
    elif isinstance(jsondata, list):
        for row in jsondata:
            yield row
Ejemplo n.º 11
0
def datasets_dbs(urls, verbose=0):
    """DBS3 implementation of datasets function"""
    headers = {'Accept':'application/json;text/json'}
    records = []
    url     = urls.get('dbs3') + '/datasets'
    params  = {'detail':'True', 'dataset_access_type':'VALID'}
    data, _ = getdata(url, params, headers, post=False, verbose=verbose,
                ckey=CKEY, cert=CERT, doseq=False, system='dbs3')
    records = json.load(data)
    data.close()
    dbsdata = {}
    for row in records:
        if  row['dataset'] not in dbsdata:
            dbsdata[row['dataset']] = \
                dict(era=row['acquisition_era_name'],
                        tier=row['data_tier_name'], status='VALID')
    for row in phedex_info(urls, dbsdata):
        yield row
Ejemplo n.º 12
0
def worker_helper(url, query, table='runsummary'):
    """
    Query RunRegistry service, see documentation at
    https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi
    url=http://runregistry.web.cern.ch/runregistry/
    """
    workspace = 'GLOBAL'
    template = 'json'
    if  table == 'runsummary':
        columns = ['number', 'startTime', 'stopTime', 'triggers',
                   'runClassName', 'runStopReason', 'bfield', 'gtKey',
                   'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy',
                   'runCreated', 'modified', 'lsCount', 'lsRanges']
    elif table == 'runlumis':
        columns = ['sectionFrom', 'sectionTo', 'runNumber']
    sdata = {'filter':query}
    path = 'api/GLOBAL/%s/%s/%s/none/data' \
                % (table, template, urllib.quote(','.join(columns)))
    callurl = os.path.join(url, path)
    result, _ = getdata(callurl, sdata, post=True)
    record = json.load(result)
    result.close()
    notations = {'lsRanges':'lumi_section_ranges',
            'number':'run_number', 'runCreated':'create_time',
            'runNumber': 'run_number',
            'stopTime': 'end_time', 'startTime': 'start_time',
            'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason',
            'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey',
            'lhcEnergy': 'beam_e', 'l1Menu': 'l1key',
            'modified': 'modify_time', 'runClassName': 'group_name'}
    for rec in record:
        for key, val in rec.items():
            if  key in notations:
                rec[notations[key]] = val
                del rec[key]
        if  table == 'runsummary':
            yield dict(run=rec)
        elif table == 'runlumis':
            if  'sectionTo' in rec and 'sectionFrom' in rec:
                rec['number'] = [i for i in \
                        range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)]
            yield dict(lumi=rec)
Ejemplo n.º 13
0
def json_parser(source, logger=None):
    """
    JSON parser based on json module. It accepts either source
    descriptor with .read()-supported file-like object or
    data as a string object.
    """
    if  isinstance(source, InstanceType) or isinstance(source, file):
        # got data descriptor
        try:
            jsondict = json.load(source)
        except Exception as exc:
            print_exc(exc)
            source.close()
            raise
        source.close()
    else:
        data = source
        # to prevent unicode/ascii errors like
        # UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position
        if  isinstance(data, basestring):
            data = unicode(data, errors='ignore')
            res  = data.replace('null', '\"null\"')
        elif isinstance(data, object) and hasattr(data, 'read'): # StringIO
            res = data.read()
        else:
            res  = data
        try:
            jsondict = json.loads(res)
        except:
            msg  = "json_parser, WARNING: fail to JSON'ify data:"
            msg += "\n%s\ndata type %s" % (res, type(res))
            if  logger:
                logger.warning(msg)
            else:
                print msg
            jsondict = eval(res, { "__builtins__": None }, {})
    yield jsondict
Ejemplo n.º 14
0
def sitedb_parser(source):
    """SiteDB parser"""
    if  isinstance(source, str) or isinstance(source, unicode):
        data = json.loads(source)
    elif isinstance(source, InstanceType) or isinstance(source, file):
        # got data descriptor
        try:
            data = json.load(source)
        except Exception as exc:
            print_exc(exc)
            source.close()
            raise
        source.close()
    else:
        data = source
    if  not isinstance(data, dict):
        raise Exception('Wrong data type, %s' % type(data))
    if  data.has_key('desc'):
        columns = data['desc']['columns']
        for row in data['result']:
            yield rowdict(columns, row)
    else:
        for row in data['result']:
            yield row
Ejemplo n.º 15
0
def dataset_info(urls, datasetdict, verbose=0):
    """
    Request blockReplicas information from Phedex for a given
    dataset or a list of dataset (use POST request in later case).
    Update MongoDB with aggregated information about dataset:
    site, size, nfiles, nblocks.
    """
    url = urls.get('phedex')
    params = {'dataset': [d for d in datasetdict.keys()]}
    headers = {'Accept':'application/json;text/json'}
    data, _ = getdata(url, params, headers, post=True, verbose=verbose)
    jsondict = json.load(data)
    data.close()
    for row in jsondict['phedex']['block']:
        name = row['name'].split('#')[0]
        for rep in row['replica']:
            rec = dict(name=name, 
                        nfiles=row['files'],
                        size=row['bytes'],
                        site=rep['node'], 
                        se=rep['se'],
                        custodial=rep['custodial'])
            rec.update(datasetdict[name])
            yield rec
Ejemplo n.º 16
0
def datasets_dbs3(urls, verbose=0):
    """DBS3 implementation of datasets function"""
    headers = {'Accept':'application/json;text/json'}
    records = []
    url     = urls.get('dbs')
    params  = {'detail':'True', 'dataset_access_type':'PRODUCTION'}
    ckey, cert = get_key_cert()
    data, _ = getdata(url, params, headers, verbose=verbose,
                ckey=ckey, cert=cert, doseq=False)
    records = json.load(data)
    data.close()
    data = {}
    size = 10 # size for POST request to Phedex
    for row in records:
        if  not data.has_key(row['dataset']):
            data[row['dataset']] = \
            dict(era=row['acquisition_era_name'], tier=row['data_tier_name'])
        if  len(data.keys()) > size:
            for rec in dataset_info(urls, data):
                yield rec
            data = {}
    if  data:
        for rec in dataset_info(urls, data):
            yield rec