Ejemplo n.º 1
0
def dbs_dataset4site_release(dbs_url, release):
    "Get dataset for given site and release"
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # in DBS3 I'll use datasets API and pass release over there
        query = 'find dataset where release=%s' % release
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                dataset = row['dataset']['dataset']
                yield dataset
            elif 'error' in row:
                err = row.get('reason', None)
                err = err if err else row['error']
                yield 'DBS error: %s' % err
    else:
        # we call datasets?release=release to get list of datasets
        dbs_url += '/datasets'
        dbs_args = \
        {'release_version': release, 'dataset_access_type':'VALID'}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for rec in json_parser(source, None):
            for row in rec:
                yield row['dataset']
Ejemplo n.º 2
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if  url.find('https:') != -1:
         return getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, self.ckey, self.cert,
             system=self.name)
     else:
         return getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, system=self.name)
Ejemplo n.º 3
0
def datasets_dbs2(urls, verbose=0):
    """DBS2 implementation of datasets function"""
    headers = {'Accept':'application/xml;text/xml'}
    records = []
    url     = urls.get('dbs')
    query   = 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*'
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    stream, _ = getdata(url, params, headers, verbose=verbose)
    records = [r for r in qlxml_parser(stream, 'dataset')]
    stream.close()
    data = {}
    size = 10 # size for POST request to Phedex
    for row in records:
        dataset = row['dataset']
        if  not data.has_key(dataset['dataset']):
            data[dataset['dataset']] = \
            dict(era=dataset['dataset.era'], tier=dataset['dataset.tier'])
        if  len(data.keys()) > size:
            for rec in dataset_info(urls, data):
                yield rec
            data = {}
    if  data:
        for rec in dataset_info(urls, data):
            yield rec
    del records
Ejemplo n.º 4
0
def phedex_files(phedex_url, kwds):
    "Get file information from Phedex"
    params = dict(kwds)  # parameters to be send to Phedex
    site = kwds.get('site', None)
    if site and phedex_node_pattern.match(site):
        if not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
        params.pop('site')
    elif site and se_pattern.match(site):
        params.update({'se': site})
        params.pop('site')
    else:
        return
    expire = 600  # set some expire since we're not going to use it
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT,
                system='phedex')
    tags = 'block.file.name'
    prim_key = 'block'
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        files = ddict.get('block.file')
        if not isinstance(files, list):
            files = [files]
        for row in files:
            yield row['name']
Ejemplo n.º 5
0
def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False):
    """
    Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS.
    We need to query ReqMgr data-service cache and find workflow ids by
    outputdataset name. The ReqMgr returns either document with ids used by MCM
    (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take
    id of the request which bypass MCM. For refences see these discussions:
    https://github.com/dmwm/DAS/issues/4045
    https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html
    """
    params = {'key': '"%s"' % dataset, 'include_docs':'true'}
    url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \
        % base
    headers = {'Accept': 'application/json;text/json'}
    expire = 600 # dummy number, we don't need it here
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    ids = []
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if  'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if  'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Ejemplo n.º 6
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if not headers:
         headers = {"Accept": "application/json"}
     # MCM uses rest API
     if "dataset" in params:
         url = "%s%s" % (url, params.get("dataset"))
     elif "mcm" in params:
         url = "%s/%s" % (url, params.get("mcm"))
     else:
         return {}
     params = {}
     result = getdata(
         url,
         params,
         headers,
         expire,
         post,
         self.error_expire,
         self.verbose,
         self.ckey,
         self.cert,
         doseq=False,
         system=self.name,
     )
     return result
Ejemplo n.º 7
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if not headers:
         headers = {'Accept': 'application/json'}
     # MCM uses rest API
     if 'dataset' in params:
         url = '%s%s' % (url, params.get('dataset'))
     elif 'mcm' in params:
         url = '%s/%s' % (url, params.get('mcm'))
     elif 'prepid' in params:
         url = '%s/%s' % (url, params.get('prepid'))
     else:
         return {}
     params = {}
     result = getdata(url,
                      params,
                      headers,
                      expire,
                      post,
                      self.error_expire,
                      self.verbose,
                      self.ckey,
                      self.cert,
                      doseq=False,
                      system=self.name)
     return result
Ejemplo n.º 8
0
def dataset_info(urls, datasetdict, verbose=0):
    """
    Request blockReplicas information from Phedex for a given
    dataset or a list of dataset (use POST request in later case).
    Update MongoDB with aggregated information about dataset:
    site, size, nfiles, nblocks.
    """
    url      = urls.get('phedex') + '/blockReplicas'
    params   = {'dataset': [d for d in datasetdict.keys()]}
    headers  = {'Accept':'application/json;text/json'}
    data, _  = getdata(url, params, headers, post=True, \
            ckey=CKEY, cert=CERT, verbose=verbose, system='dbs_phedex')
    if  isinstance(data, basestring): # no response
        dastimestamp('DBS_PHEDEX ERROR: %s' % data)
        return
    jsondict = json.load(data)
    data.close()
    for row in jsondict['phedex']['block']:
        dataset = row['name'].split('#')[0]
        for rep in row['replica']:
            rec = dict(dataset=dataset,
                        nfiles=row['files'],
                        size=row['bytes'],
                        site=rep['node'],
                        se=rep['se'],
                        custodial=rep['custodial'])
            rec.update(datasetdict[dataset])
            yield rec
    data.close()
Ejemplo n.º 9
0
def dbs_find(entity, url, kwds):
    "Find files for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block', None)
    lfn     = kwds.get('lfn', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    query = 'find %s' % entity
    if  dataset:
        query += ' where dataset=%s' % dataset
    elif block:
        query += ' where block=%s' % block
    elif lfn:
        query += ' where file=%s' % lfn
    if  runs:
        rcond   = ' or '.join(['run=%s' % r for r in runs])
        query  += ' and (%s)' % rcond
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT)
    pkey    = entity
    for row in qlxml_parser(source, pkey):
        val = row[entity][entity]
        yield val
Ejemplo n.º 10
0
def phedex_files(phedex_url, kwds):
    "Get file information from Phedex"
    params = dict(kwds) # parameters to be send to Phedex
    site = kwds.get('site', None)
    if  site and phedex_node_pattern.match(site):
        if  not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
        params.pop('site')
    elif site and se_pattern.match(site):
        params.update({'se': site})
        params.pop('site')
    else:
        return
    expire = 600 # set some expire since we're not going to use it
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT,
                system='phedex')
    tags = 'block.file.name'
    prim_key = 'block'
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        files = ddict.get('block.file')
        if  not isinstance(files, list):
            files = [files]
        for row in files:
            yield row['name']
Ejemplo n.º 11
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     conn  = db_connection(self.dburi)
     col   = conn[self.name][cname]
     local = find_one(col, {'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if 'expire' not in r][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Ejemplo n.º 12
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if  url[-1] == '/':
         url = url[:-1]
     return getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, self.ckey, self.cert,
             system=self.name)
Ejemplo n.º 13
0
def run_lumis_dbs(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS for a given dataset"
    res      = {} # output result
    api_url  = url + '/blocks'
    params   = {'dataset': dataset}
    data, _  = getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
    for row in json.load(data):
        api_url = url + '/filelumis'
        params = {'block_name': row['block_name']}
        data, _  = \
            getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
        for rec in json.load(data):
            run  = rec['run_num']
            lumi = rec['lumi_section_num']
            res.setdefault(run, []).append(lumi)
    return res
Ejemplo n.º 14
0
 def getdata_helper(self, url, params, expire, headers=None, post=None):
     "Helper function to get data from SiteDB or local cache"
     cname = url.split('/')[-1].replace('-', '_')
     col   = self.localcache.conn[self.name][cname]
     local = col.find_one({'expire':{'$gt':expire_timestamp(time.time())}})
     data  = None
     if  local:
         msg = 'SiteDBService reads from %s.%s' % (self.name, cname)
         self.logger.info(msg)
         try: # get data from local cache
             data = [r for r in col.find() if not r.has_key('expire')][0]
             del data['_id']
         except Exception as exc:
             print_exc(exc)
             data = {}
     if  not data or not local:
         headers = {'Accept':'application/json'}
         datastream, expire = getdata(\
                 url, params, headers, expire, post,
                 self.error_expire, self.verbose, self.ckey, self.cert,
                 system=self.name)
         try: # read data and write it to local cache
             data = json.load(datastream)
             datastream.close()
             col.remove()
             col.insert(data)
             col.insert({'expire':expire_timestamp(expire)})
         except Exception as exc:
             print_exc(exc)
     return data, expire
Ejemplo n.º 15
0
def runs_dbs(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS2 for a given dataset"
    api_url = url + '/runs'
    params = {'dataset': dataset}
    data, _ = getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
    for row in json.load(data):
        run = row['run']['run_num']
        yield run
Ejemplo n.º 16
0
Archivo: dbs_rr.py Proyecto: dmwm/DAS
def runs_dbs(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS2 for a given dataset"
    api_url  = url + '/runs'
    params   = {'dataset': dataset}
    data, _  = getdata(api_url, params, ckey=ckey, cert=cert, system='combined')
    for row in json.load(data):
        run  = row['run']['run_num']
        yield run
Ejemplo n.º 17
0
Archivo: dbs_rr.py Proyecto: ktf/DAS
def runs_dbs2(url, dataset, ckey, cert):
    "Retrive list of run from DBS2 for a given dataset"
    query    = "find run where dataset=%s" % dataset
    params   = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query)
    data, _  = getdata(url, params, ckey=ckey, cert=cert, system='combined')
    prim_key = 'run'
    for row in qlxml_parser(data, prim_key):
        run  = row['run']['run']
        yield run
Ejemplo n.º 18
0
def site4dataset(dbs_url, phedex_api, args, expire):
    "Yield site information about given dataset"
    # DBS part
    dataset = args['dataset']
    try:
        totblocks, totfiles = dataset_summary(dbs_url, dataset)
    except Exception as err:
        error  = str(err)
        reason = "Can't find #block, #files info in DBS for dataset=%s" \
                % dataset
        yield {'site': {'error': error, 'reason': reason}}
        return
    # Phedex part
    phedex_args = {'dataset':args['dataset']}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_api, phedex_args, headers, expire, post=True,
                system='phedex')
    prim_key = 'block'
    tags = 'block.replica.node'
    site_info = {}
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        replicas = ddict.get('block.replica')
        if  not isinstance(replicas, list):
            replicas = [replicas]
        for row in replicas:
            if  not row or 'node' not in row:
                continue
            node = row['node']
            files = int(row['files'])
            complete = 1 if row['complete'] == 'y' else 0
            if  node in site_info:
                files = site_info[node]['files'] + files
                nblks  = site_info[node]['blocks'] + 1
                bc_val = site_info[node]['blocks_complete']
                b_complete = bc_val+1 if complete else bc_val
            else:
                b_complete = 1 if complete else 0
                nblks = 1
            site_info[node] = {'files': files, 'blocks': nblks,
                        'blocks_complete': b_complete}
    row = {}
    for key, val in site_info.iteritems():
        if  totfiles:
            nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles)
        else:
            nfiles = 'N/A'
        if  totblocks:
            nblks  = '%5.2f%%' % (100*float(val['blocks'])/totblocks)
        else:
            nblks = 'N/A'
        ratio = float(val['blocks_complete'])/val['blocks']
        b_completion = '%5.2f%%' % (100*ratio)
        row = {'site':{'name':key, 'dataset_fraction': nfiles,
            'block_fraction': nblks, 'block_completion': b_completion}}
        yield row
Ejemplo n.º 19
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if  url[-1] == '/':
         url = url[:-1]
     for key, _val in params.iteritems():
         url = '/'.join([url, params[key]])
     params = {}
     return getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, self.ckey, self.cert,
             system=self.name)
Ejemplo n.º 20
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if  not headers:
         headers =  {'Accept': 'application/json' } # DBS3 always needs that
     if  url.find('datasetlist') != -1:
         post = True
         headers['Content-type'] = 'application/json'
     return getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, self.ckey, self.cert,
             doseq=False, system=self.name)
Ejemplo n.º 21
0
def run_lumis_dbs2(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS2 for a given dataset"
    query    = "find run, lumi where dataset=%s" % dataset
    params   = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query)
    data, _  = getdata(url, params, ckey=ckey, cert=cert, system='combined')
    prim_key = 'run'
    res = {} # output result
    for row in qlxml_parser(data, prim_key):
        run  = row['run']['run']
        lumi = row['run']['lumi']
        res.setdefault(run, []).append(lumi)
    return res
Ejemplo n.º 22
0
def worker_helper(url, query, table='runsummary'):
    """
    Query RunRegistry service, see documentation at
    https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi
    url=http://runregistry.web.cern.ch/runregistry/
    """
    workspace = 'GLOBAL'
    template = 'json'
    if table == 'runsummary':
        columns = [
            'number', 'startTime', 'stopTime', 'triggers', 'runClassName',
            'runStopReason', 'bfield', 'gtKey', 'l1Menu', 'hltKeyDescription',
            'lhcFill', 'lhcEnergy', 'runCreated', 'modified', 'lsCount',
            'lsRanges'
        ]
    elif table == 'runlumis':
        columns = ['sectionFrom', 'sectionTo', 'runNumber']
    sdata = {'filter': query}
    path = 'api/GLOBAL/%s/%s/%s/none/data' \
                % (table, template, urllib.quote(','.join(columns)))
    callurl = os.path.join(url, path)
    result, _ = getdata(callurl, sdata, post=True)
    record = json.load(result)
    result.close()
    notations = {
        'lsRanges': 'lumi_section_ranges',
        'number': 'run_number',
        'runCreated': 'create_time',
        'runNumber': 'run_number',
        'stopTime': 'end_time',
        'startTime': 'start_time',
        'lsCount': 'lumi_sections',
        'runStopReason': 'stop_reason',
        'hltKeyDescription': 'hltkey',
        'gtKey': 'gtkey',
        'lhcEnergy': 'beam_e',
        'l1Menu': 'l1key',
        'modified': 'modify_time',
        'runClassName': 'group_name'
    }
    for rec in record:
        for key, val in rec.items():
            if key in notations:
                rec[notations[key]] = val
                del rec[key]
        if table == 'runsummary':
            yield dict(run=rec)
        elif table == 'runlumis':
            if 'sectionTo' in rec and 'sectionFrom' in rec:
                rec['number'] = [i for i in \
                        range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)]
            yield dict(lumi=rec)
Ejemplo n.º 23
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # DBS2 call
        query  = 'find count(file.name), count(block.name)'
        query += ' where dataset=%s and dataset.status=*' % dataset
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                totfiles  = row['dataset']['count_file.name']
                totblocks = row['dataset']['count_block.name']
                return totblocks, totfiles
            elif 'error' in row:
                raise Exception(row.get('reason', row['error']))
        # if we're here we didn't find a dataset, throw the error
        msg = 'empty set'
        raise Exception(msg)
    else:
        # we call filesummaries?dataset=dataset to get number of files/blks
        dbs_url += '/filesummaries'
        dbs_args = {'dataset': dataset}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for row in json_parser(source, None):
            totfiles  = row[0]['num_file']
            totblocks = row[0]['num_block']
            return totblocks, totfiles
Ejemplo n.º 24
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if url.find('https:') != -1:
         return getdata(url,
                        params,
                        headers,
                        expire,
                        post,
                        self.error_expire,
                        self.verbose,
                        self.ckey,
                        self.cert,
                        system=self.name)
     else:
         return getdata(url,
                        params,
                        headers,
                        expire,
                        post,
                        self.error_expire,
                        self.verbose,
                        system=self.name)
Ejemplo n.º 25
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if url[-1] == '/':
         url = url[:-1]
     return getdata(url,
                    params,
                    headers,
                    expire,
                    post,
                    self.error_expire,
                    self.verbose,
                    self.ckey,
                    self.cert,
                    system=self.name)
Ejemplo n.º 26
0
def dbs_dataset4release_parent(dbs_url, release, parent=None):
    "Get dataset for given release and optional parent dataset"
    expire = 600 # set some expire since we're not going to use it
    # we call datasets?release=release to get list of datasets
    dbs_url += '/datasets'
    dbs_args = \
    {'release_version': release, 'dataset_access_type':'VALID'}
    if  parent:
        dbs_args.update({'parent_dataset': parent})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for rec in json_parser(source, None):
        for row in rec:
            yield row['dataset']
Ejemplo n.º 27
0
def dbs_dataset4release_parent(dbs_url, release, parent=None):
    "Get dataset for given release and optional parent dataset"
    expire = 600  # set some expire since we're not going to use it
    # we call datasets?release=release to get list of datasets
    dbs_url += '/datasets'
    dbs_args = \
    {'release_version': release, 'dataset_access_type':'VALID'}
    if parent:
        dbs_args.update({'parent_dataset': parent})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for rec in json_parser(source, None):
        for row in rec:
            yield row['dataset']
Ejemplo n.º 28
0
 def site_info(self, phedex_url, site):
     "Return Phedex site info about given site (rely on local cache)"
     if  abs(self.sites.get('tstamp') - time.time()) > self.thr \
             or site not in self.sites:
         # need to update the cache
         # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes
         expire = self.thr
         args = {}
         api = phedex_url + '/nodes'
         headers = {'Accept': 'application/json;text/json'}
         source, expire = \
             getdata(api, args, headers, expire, system='phedex')
         self.sites['tstamp'] = time.time()
         for rec in json_parser(source, None):
             for row in rec['phedex']['node']:
                 self.sites[row['name']] = row['kind']
     return self.sites.get(site, 'NA')
Ejemplo n.º 29
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if url[-1] == '/':
         url = url[:-1]
     for key, _val in params.items():
         url = '/'.join([url, params[key]])
     params = {}
     return getdata(url,
                    params,
                    headers,
                    expire,
                    post,
                    self.error_expire,
                    self.verbose,
                    self.ckey,
                    self.cert,
                    system=self.name)
Ejemplo n.º 30
0
 def site_info(self, phedex_url, site):
     "Return Phedex site info about given site (rely on local cache)"
     if  abs(self.sites.get('tstamp') - time.time()) > self.thr \
             or site not in self.sites:
         # need to update the cache
         # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes
         expire = self.thr
         args = {}
         api = phedex_url + '/nodes'
         headers = {'Accept': 'application/json;text/json'}
         source, expire = \
             getdata(api, args, headers, expire, system='phedex')
         self.sites['tstamp'] = time.time()
         for rec in json_parser(source, None):
             for row in rec['phedex']['node']:
                 self.sites[row['name']] = row['kind']
     return self.sites.get(site, 'NA')
Ejemplo n.º 31
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    # we call filesummaries?dataset=dataset to get number of files/blks
    dbs_url += '/filesummaries'
    dbs_args = {'dataset': dataset, 'validFileOnly': 1}
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for row in json_parser(source, None):
        totfiles  = row[0]['num_file']
        totblocks = row[0]['num_block']
        return totblocks, totfiles
Ejemplo n.º 32
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if not headers:
         headers = {"Accept": "application/json"}  # DBS3 always needs that
     return getdata(
         url,
         params,
         headers,
         expire,
         post,
         self.error_expire,
         self.verbose,
         self.ckey,
         self.cert,
         doseq=False,
         system=self.name,
     )
Ejemplo n.º 33
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600  # set some expire since we're not going to use it
    # we call filesummaries?dataset=dataset to get number of files/blks
    dbs_url += '/filesummaries'
    dbs_args = {'dataset': dataset, 'validFileOnly': 1}
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for row in json_parser(source, None):
        totfiles = row[0]['num_file']
        totblocks = row[0]['num_block']
        return totblocks, totfiles
Ejemplo n.º 34
0
def datasets_dbs(urls, verbose=0):
    """DBS3 implementation of datasets function"""
    headers = {'Accept':'application/json;text/json'}
    records = []
    url     = urls.get('dbs3') + '/datasets'
    params  = {'detail':'True', 'dataset_access_type':'VALID'}
    data, _ = getdata(url, params, headers, post=False, verbose=verbose,
                ckey=CKEY, cert=CERT, doseq=False, system='dbs3')
    records = json.load(data)
    data.close()
    dbsdata = {}
    for row in records:
        if  row['dataset'] not in dbsdata:
            dbsdata[row['dataset']] = \
                dict(era=row['acquisition_era_name'],
                        tier=row['data_tier_name'], status='VALID')
    for row in phedex_info(urls, dbsdata):
        yield row
Ejemplo n.º 35
0
 def getdata(self, url, params, expire, headers=None, post=None):
     """URL call wrapper"""
     if  not headers:
         headers =  {'Accept': 'application/json' }
     # MCM uses rest API
     if  'dataset' in params:
         url  = '%s%s' % (url, params.get('dataset'))
     elif 'mcm' in params:
         url = '%s/%s' % (url, params.get('mcm'))
     elif 'prepid' in params:
         url = '%s/%s' % (url, params.get('prepid'))
     else:
         return {}
     params = {}
     result = getdata(url, params, headers, expire, post,
             self.error_expire, self.verbose, self.ckey, self.cert,
             doseq=False, system=self.name)
     return result
Ejemplo n.º 36
0
def worker_helper(url, query, table='runsummary'):
    """
    Query RunRegistry service, see documentation at
    https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi
    url=http://runregistry.web.cern.ch/runregistry/
    """
    workspace = 'GLOBAL'
    template = 'json'
    if  table == 'runsummary':
        columns = ['number', 'startTime', 'stopTime', 'triggers',
                   'runClassName', 'runStopReason', 'bfield', 'gtKey',
                   'l1Menu', 'hltKeyDescription', 'lhcFill', 'lhcEnergy',
                   'runCreated', 'modified', 'lsCount', 'lsRanges']
    elif table == 'runlumis':
        columns = ['sectionFrom', 'sectionTo', 'runNumber']
    sdata = {'filter':query}
    path = 'api/GLOBAL/%s/%s/%s/none/data' \
                % (table, template, urllib.quote(','.join(columns)))
    callurl = os.path.join(url, path)
    result, _ = getdata(callurl, sdata, post=True)
    record = json.load(result)
    result.close()
    notations = {'lsRanges':'lumi_section_ranges',
            'number':'run_number', 'runCreated':'create_time',
            'runNumber': 'run_number',
            'stopTime': 'end_time', 'startTime': 'start_time',
            'lsCount': 'lumi_sections', 'runStopReason': 'stop_reason',
            'hltKeyDescription': 'hltkey', 'gtKey': 'gtkey',
            'lhcEnergy': 'beam_e', 'l1Menu': 'l1key',
            'modified': 'modify_time', 'runClassName': 'group_name'}
    for rec in record:
        for key, val in rec.items():
            if  key in notations:
                rec[notations[key]] = val
                del rec[key]
        if  table == 'runsummary':
            yield dict(run=rec)
        elif table == 'runlumis':
            if  'sectionTo' in rec and 'sectionFrom' in rec:
                rec['number'] = [i for i in \
                        range(rec.pop('sectionFrom'), rec.pop('sectionTo')+1)]
            yield dict(lumi=rec)
Ejemplo n.º 37
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block_name', None)
    if  not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get('block', None)
    lfn     = kwds.get('file', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value
    if  dataset:
        params = {'dataset':dataset}
    elif block:
        params = {'block_name': block}
    elif lfn:
        params = {'logical_file_name': lfn}
    if  runs:
        params.update({'run_num': runs})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if  isinstance(rec, basestring):
                    print(dastimestamp('DBS3 ERROR:'), row)
                elif  entity == 'file':
                    yield rec['logical_file_name']
                elif  entity == 'block':
                    yield rec['block_name']
                elif  entity == 'file':
                    yield rec['dataset']
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Ejemplo n.º 38
0
def proxy_getdata(urls):
    "Get data for given URLs via proxy server"
    try:
        result = [r for r in urlfetch_proxy([])]
    except Exception as _exc:
        result = []
    if  len(result) == 1 and result[0] == {'ping':'pong'}:
        for row in urlfetch_proxy(urls):
            yield row
    else: # sequential access
        error_expire = 60
        expire = 60
        post = False
        verbose = False
        params = {}
        headers = {}
        for url in urls:
            data, _ = getdata(url, params, headers, expire, post,
                            error_expire, verbose, CKEY, CERT)
            yield data.read()
Ejemplo n.º 39
0
def proxy_getdata(urls):
    "Get data for given URLs via proxy server"
    try:
        result = [r for r in urlfetch_proxy([])]
    except Exception as _exc:
        result = []
    if len(result) == 1 and result[0] == {'ping': 'pong'}:
        for row in urlfetch_proxy(urls):
            yield row
    else:  # sequential access
        error_expire = 60
        expire = 60
        post = False
        verbose = False
        params = {}
        headers = {}
        for url in urls:
            data, _ = getdata(url, params, headers, expire, post, error_expire,
                              verbose, CKEY, CERT)
            yield data.read()
Ejemplo n.º 40
0
def datasets_dbs2(urls, verbose=0):
    """DBS2 implementation of datasets function"""
    headers = {'Accept':'application/xml;text/xml'}
    records = []
    url     = urls.get('dbs')
    query   = \
        'find dataset,dataset.tier,dataset.era where dataset.status like VALID*'
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    stream, _ = getdata(url, params, headers, post=False, \
            ckey=CKEY, cert=CERT, verbose=verbose, system='dbs')
    records = [r for r in qlxml_parser(stream, 'dataset')]
    stream.close()
    dbsdata = {}
    for row in records:
        dataset = row['dataset']
        if  dataset['dataset'] not in dbsdata:
            dbsdata[dataset['dataset']] = \
                dict(era=dataset['dataset.era'],
                        tier=dataset['dataset.tier'], status='VALID')
    for row in phedex_info(urls, dbsdata):
        yield row
Ejemplo n.º 41
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if entity not in ["run", "file", "block"]:
        msg = "Unsupported entity key=%s" % entity
        raise Exception(msg)
    expire = 600
    dataset = kwds.get("dataset", None)
    block = kwds.get("block_name", None)
    if not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get("block", None)
    lfn = kwds.get("file", None)
    runs = kwds.get("runs", [])
    if not (dataset or block or lfn):
        return
    url = "%s/%ss" % (url, entity)  # DBS3 APIs use plural entity value
    if dataset:
        params = {"dataset": dataset}
    elif block:
        params = {"block_name": block}
    elif lfn:
        params = {"logical_file_name": lfn}
    if runs:
        params.update({"run_num": runrange(runs[0], runs[-1], False)})
    headers = {"Accept": "application/json;text/json"}
    source, expire = getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if isinstance(rec, basestring):
                    print dastimestamp("DBS3 ERROR:"), row
                elif entity == "file":
                    yield rec["logical_file_name"]
                elif entity == "block":
                    yield rec["block_name"]
                elif entity == "file":
                    yield rec["dataset"]
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Ejemplo n.º 42
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600 # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if  not doc:
                continue
            if  'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if  'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Ejemplo n.º 43
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr2 or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600  # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            found = 0
            if not doc:
                continue
            for key in doc.keys():
                if key.endswith("ConfigCacheID"):
                    ids.append(doc[key])
                    found += 1
            if not found:
                if 'id' in rec and 'key' in rec and rec['key'] == dataset:
                    if rec['id']:
                        ids.append(rec['id'])
    return ids
Ejemplo n.º 44
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600  # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if not doc:
                continue
            if 'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if 'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Ejemplo n.º 45
0
def site4dataset(dbs_url, phedex_api, args, expire):
    "Yield site information about given dataset"
    # DBS part
    dataset = args['dataset']
    try:
        totblocks, totfiles = dataset_summary(dbs_url, dataset)
    except Exception as err:
        error = 'combined service unable to process your request'
        reason = "Fail to parse #block, #files info, %s" % str(err)
        yield {
            'site': {
                'name': 'N/A',
                'se': 'N/A',
                'error': error,
                'reason': reason
            }
        }
        return
    # Phedex part
    phedex_args = {'dataset': args['dataset']}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_api, phedex_args, headers, expire, system='phedex')
    prim_key = 'block'
    tags = 'block.replica.node'
    site_info = {}
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        replicas = ddict.get('block.replica')
        if not isinstance(replicas, list):
            replicas = [replicas]
        for row in replicas:
            if not row or 'node' not in row:
                continue
            node = row['node']
            files = int(row['files'])
            complete = 1 if row['complete'] == 'y' else 0
            if node in site_info:
                files = site_info[node]['files'] + files
                nblks = site_info[node]['blocks'] + 1
                bc_val = site_info[node]['blocks_complete']
                b_complete = bc_val + 1 if complete else bc_val
            else:
                b_complete = 1 if complete else 0
                nblks = 1
            site_info[node] = {
                'files': files,
                'blocks': nblks,
                'blocks_complete': b_complete
            }
    row = {}
    for key, val in site_info.items():
        if totfiles:
            nfiles = '%5.2f%%' % (100 * float(val['files']) / totfiles)
        else:
            nfiles = 'N/A'
        if totblocks:
            nblks = '%5.2f%%' % (100 * float(val['blocks']) / totblocks)
        else:
            nblks = 'N/A'
        ratio = float(val['blocks_complete']) / val['blocks']
        b_completion = '%5.2f%%' % (100 * ratio)
        row = {
            'site': {
                'name': key,
                'dataset_fraction': nfiles,
                'block_fraction': nblks,
                'block_completion': b_completion
            }
        }
        yield row
Ejemplo n.º 46
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if args['site'].find('.') != -1:  # it is SE
             phedex_args = {
                 'dataset': list(datasets),
                 'se': '%s' % args['site']
             }
         else:
             phedex_args = {
                 'dataset': list(datasets),
                 'node': '%s*' % args['site']
             }
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {
                     'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files
                 }
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset': record}
         del datasets
         del found
     if api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind': skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {
                 'site': {
                     'name': 'Fail to look-up site info',
                     'error': msg,
                     'dataset_fraction': 'N/A',
                     'block_fraction': 'N/A',
                     'block_completion': 'N/A'
                 },
                 'error': msg
             }
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site = args.get('site')
         phedex_api = phedex_url.replace('/json/',
                                         '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file': {'name': fname}}