Exemple #1
0
def findReqMgrIds(dataset, base='https://cmsweb.cern.ch', verbose=False):
    """
    Find ReqMgrIds for a given dataset. This is quite complex procedure in CMS.
    We need to query ReqMgr data-service cache and find workflow ids by
    outputdataset name. The ReqMgr returns either document with ids used by MCM
    (i.e. ProcConfigCacheID, ConfigCacheID, SkimConfigCacheID) or we can take
    id of the request which bypass MCM. For refences see these discussions:
    https://github.com/dmwm/DAS/issues/4045
    https://hypernews.cern.ch/HyperNews/CMS/get/dmDevelopment/1501/1/1/1/1.html
    """
    params = {'key': '"%s"' % dataset, 'include_docs':'true'}
    url = "%s/couchdb/reqmgr_workload_cache/_design/ReqMgr/_view/byoutputdataset" \
        % base
    headers = {'Accept': 'application/json;text/json'}
    expire = 600 # dummy number, we don't need it here
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    ids = []
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if  'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if  'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Exemple #2
0
def dbs_dataset4site_release(dbs_url, release):
    "Get dataset for given site and release"
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # in DBS3 I'll use datasets API and pass release over there
        query = 'find dataset where release=%s' % release
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                dataset = row['dataset']['dataset']
                yield dataset
            elif 'error' in row:
                err = row.get('reason', None)
                err = err if err else row['error']
                yield 'DBS error: %s' % err
    else:
        # we call datasets?release=release to get list of datasets
        dbs_url += '/datasets'
        dbs_args = \
        {'release_version': release, 'dataset_access_type':'VALID'}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for rec in json_parser(source, None):
            for row in rec:
                yield row['dataset']
Exemple #3
0
 def parser(self, dasquery, dformat, data, api):
     "DAS parser for MCM data-service"
     prim_key  = self.dasmapping.primary_key(self.name, api)
     gen       = json_parser(data, self.logger)
     counter   = 0
     for rec in gen:
         if  'results' in rec:
             row = rec['results']
         else:
             row = rec
         for key in ['_id', '_rev']:
             if  key in row:
                 del row[key]
         if  row:
             if  api == 'dataset4mcm':
                 for val in row.values():
                     if  isinstance(val, basestring):
                         yield {'dataset':{'name': val}}
                     elif isinstance(val, list):
                         for vvv in val:
                             yield {'dataset':{'name': vvv}}
             else:
                 yield {'mcm':row}
         counter += 1
     msg  = "api=%s, format=%s " % (api, dformat)
     msg += "prim_key=%s yield %s rows" % (prim_key, counter)
     self.logger.info(msg)
Exemple #4
0
def dataset_summary(dbs_url, getdata, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs2':
        # DBS2 call
        query = 'find count(file.name), count(block.name) where dataset=%s'\
                 % dataset
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        prim_key = 'dataset'
        datasets = set()
        for row in qlxml_parser(source, prim_key):
            totfiles  = row['dataset']['count_file.name']
            totblocks = row['dataset']['count_block.name']
            return totblocks, totfiles
    else:
        # we call filesummaries?dataset=dataset to get number of files/blks
        dbs_args = {'dataset': dataset}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        for row in json_parser(source, None):
            totfiles  = row[0]['num_file']
            totblocks = row[0]['num_block']
            return totblocks, totfiles
Exemple #5
0
 def parser(self, dasquery, dformat, data, api):
     "DAS parser for MCM data-service"
     prim_key = self.dasmapping.primary_key(self.name, api)
     gen = json_parser(data, self.logger)
     counter = 0
     for rec in gen:
         if 'results' in rec:
             row = rec['results']
         else:
             row = rec
         for key in ['_id', '_rev']:
             if key in row:
                 del row[key]
         if row:
             if api == 'dataset4mcm':
                 for val in row.values():
                     if isinstance(val, basestring):
                         yield {'dataset': {'name': val}}
                     elif isinstance(val, list):
                         for vvv in val:
                             yield {'dataset': {'name': vvv}}
             else:
                 yield {'mcm': row}
         counter += 1
     msg = "api=%s, format=%s " % (api, dformat)
     msg += "prim_key=%s yield %s rows" % (prim_key, counter)
     self.logger.info(msg)
Exemple #6
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key  = self.dasmapping.primary_key(self.name, api)
        apitag    = self.dasmapping.apitag(self.name, api)
        counter   = 0
        if  dformat.lower() == 'xml':
            tags = self.dasmapping.api2daskey(self.name, api)
            gen  = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == 'json' or dformat.lower() == 'dasjson':
            gen  = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if  dformat.lower() == 'dasjson':
                    for key, val in row.iteritems():
                        if  key != 'results':
                            das_dict[key] = val
                    row = row['results']
                    self.analytics.update_apicall(\
                        dasquery.mongo_query, das_dict)
                if  apitag and row.has_key(apitag):
                    row = row[apitag]
                if  isinstance(row, list):
                    for item in row:
                        if  item.has_key(prim_key):
                            counter += 1
                            yield item
                        else:
                            counter += 1
                            yield {prim_key:item}
                else:
                    if  row.has_key(prim_key):
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key:row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg  = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #7
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key = self.dasmapping.primary_key(self.name, api)
        counter = 0
        if dformat.lower() == 'xml':
            tags = self.dasmapping.api2daskey(self.name, api)
            gen = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == 'json' or dformat.lower() == 'dasjson':
            gen = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if dformat.lower() == 'dasjson':
                    for key, val in row.items():
                        if key != 'results':
                            das_dict[key] = val
                    row = row['results']
                if isinstance(row, list):
                    for item in row:
                        if item:
                            if prim_key in item:
                                counter += 1
                                yield item
                            else:
                                counter += 1
                                yield {prim_key: item}
                else:
                    if prim_key in row:
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key: row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #8
0
 def test_json_parser(self):
     """
     Test functionality of json_parser
     """
     jsondata = {'beer': {'amstel':'good', 'guiness':'better'}}
     fdesc  = tempfile.NamedTemporaryFile()
     fname  = fdesc.name
     stream = file(fname, 'w')
     stream.write(json.dumps(jsondata))
     stream.close()
     stream = file(fname, 'r')
     gen    = json_parser(stream)
     result = gen.next()
     expect = {'beer': {'amstel': 'good', 'guiness': 'better'}}
     self.assertEqual(expect, result)
Exemple #9
0
 def test_json_parser(self):
     """
     Test functionality of json_parser
     """
     jsondata = {'beer': {'amstel': 'good', 'guiness': 'better'}}
     fdesc = tempfile.NamedTemporaryFile()
     fname = fdesc.name
     stream = open(fname, 'w')
     stream.write(json.dumps(jsondata))
     stream.close()
     stream = open(fname, 'r')
     gen = json_parser(stream)
     result = next(gen)
     expect = {'beer': {'amstel': 'good', 'guiness': 'better'}}
     self.assertEqual(expect, result)
Exemple #10
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key = self.dasmapping.primary_key(self.name, api)
        counter = 0
        if dformat.lower() == "xml":
            tags = self.dasmapping.api2daskey(self.name, api)
            gen = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == "json" or dformat.lower() == "dasjson":
            gen = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if dformat.lower() == "dasjson":
                    for key, val in row.iteritems():
                        if key != "results":
                            das_dict[key] = val
                    row = row["results"]
                if isinstance(row, list):
                    for item in row:
                        if prim_key in item:
                            counter += 1
                            yield item
                        else:
                            counter += 1
                            yield {prim_key: item}
                else:
                    if prim_key in row:
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key: row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #11
0
def dbs_dataset4release_parent(dbs_url, release, parent=None):
    "Get dataset for given release and optional parent dataset"
    expire = 600  # set some expire since we're not going to use it
    # we call datasets?release=release to get list of datasets
    dbs_url += '/datasets'
    dbs_args = \
    {'release_version': release, 'dataset_access_type':'VALID'}
    if parent:
        dbs_args.update({'parent_dataset': parent})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for rec in json_parser(source, None):
        for row in rec:
            yield row['dataset']
Exemple #12
0
def dbs_dataset4release_parent(dbs_url, release, parent=None):
    "Get dataset for given release and optional parent dataset"
    expire = 600 # set some expire since we're not going to use it
    # we call datasets?release=release to get list of datasets
    dbs_url += '/datasets'
    dbs_args = \
    {'release_version': release, 'dataset_access_type':'VALID'}
    if  parent:
        dbs_args.update({'parent_dataset': parent})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for rec in json_parser(source, None):
        for row in rec:
            yield row['dataset']
Exemple #13
0
 def parser(self, query, dformat, source, api):
     """
     DBS3 data-service parser.
     """
     if  api == 'site4dataset':
         sites = set()
         for rec in json_parser(source, self.logger):
             if  isinstance(rec, list):
                 for row in rec:
                     orig_site = row['origin_site_name']
                     if  orig_site not in sites:
                         sites.add(orig_site)
             else:
                 orig_site = rec.get('origin_site_name', None)
                 if  orig_site and orig_site not in sites:
                     sites.add(orig_site)
         for site in sites:
             yield {'site': {'name': site}}
     elif api == 'filesummaries':
         gen = DASAbstractService.parser(self, query, dformat, source, api)
         for row in gen:
             yield row['dataset']
     elif api == 'blockparents':
         gen = DASAbstractService.parser(self, query, dformat, source, api)
         for row in gen:
             try:
                 del row['parent']['this_block_name']
             except:
                 pass
             yield row
     elif api == 'fileparents':
         gen = DASAbstractService.parser(self, query, dformat, source, api)
         for row in gen:
             parent = row['parent']
             for val in parent['parent_logical_file_name']:
                 yield dict(name=val)
     elif api == 'filechildren':
         gen = DASAbstractService.parser(self, query, dformat, source, api)
         for row in gen:
             parent = row['child']
             for val in parent['child_logical_file_name']:
                 yield dict(name=val)
     else:
         gen = DASAbstractService.parser(self, query, dformat, source, api)
         for row in gen:
             yield row
Exemple #14
0
 def site_info(self, phedex_url, site):
     "Return Phedex site info about given site (rely on local cache)"
     if  abs(self.sites.get('tstamp') - time.time()) > self.thr \
             or site not in self.sites:
         # need to update the cache
         # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes
         expire = self.thr
         args = {}
         api = phedex_url + '/nodes'
         headers = {'Accept': 'application/json;text/json'}
         source, expire = \
             getdata(api, args, headers, expire, system='phedex')
         self.sites['tstamp'] = time.time()
         for rec in json_parser(source, None):
             for row in rec['phedex']['node']:
                 self.sites[row['name']] = row['kind']
     return self.sites.get(site, 'NA')
Exemple #15
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600  # set some expire since we're not going to use it
    # we call filesummaries?dataset=dataset to get number of files/blks
    dbs_url += '/filesummaries'
    dbs_args = {'dataset': dataset, 'validFileOnly': 1}
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for row in json_parser(source, None):
        totfiles = row[0]['num_file']
        totblocks = row[0]['num_block']
        return totblocks, totfiles
Exemple #16
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    # we call filesummaries?dataset=dataset to get number of files/blks
    dbs_url += '/filesummaries'
    dbs_args = {'dataset': dataset, 'validFileOnly': 1}
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                system='dbs3')
    for row in json_parser(source, None):
        totfiles  = row[0]['num_file']
        totblocks = row[0]['num_block']
        return totblocks, totfiles
Exemple #17
0
 def site_info(self, phedex_url, site):
     "Return Phedex site info about given site (rely on local cache)"
     if  abs(self.sites.get('tstamp') - time.time()) > self.thr \
             or site not in self.sites:
         # need to update the cache
         # use Phedex API https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes
         expire = self.thr
         args = {}
         api = phedex_url + '/nodes'
         headers = {'Accept': 'application/json;text/json'}
         source, expire = \
             getdata(api, args, headers, expire, system='phedex')
         self.sites['tstamp'] = time.time()
         for rec in json_parser(source, None):
             for row in rec['phedex']['node']:
                 self.sites[row['name']] = row['kind']
     return self.sites.get(site, 'NA')
Exemple #18
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block_name', None)
    if  not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get('block', None)
    lfn     = kwds.get('file', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    url = '%s/%ss' % (url, entity) # DBS3 APIs use plural entity value
    if  dataset:
        params = {'dataset':dataset}
    elif block:
        params = {'block_name': block}
    elif lfn:
        params = {'logical_file_name': lfn}
    if  runs:
        params.update({'run_num': runs})
    headers = {'Accept': 'application/json;text/json'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if  isinstance(rec, basestring):
                    print(dastimestamp('DBS3 ERROR:'), row)
                elif  entity == 'file':
                    yield rec['logical_file_name']
                elif  entity == 'block':
                    yield rec['block_name']
                elif  entity == 'file':
                    yield rec['dataset']
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Exemple #19
0
def dbs_find(entity, url, kwds, verbose=0):
    "Find DBS3 entity for given set of parameters"
    if entity not in ["run", "file", "block"]:
        msg = "Unsupported entity key=%s" % entity
        raise Exception(msg)
    expire = 600
    dataset = kwds.get("dataset", None)
    block = kwds.get("block_name", None)
    if not block:
        # TODO: this should go away when DBS will be retired (user in combined srv)
        block = kwds.get("block", None)
    lfn = kwds.get("file", None)
    runs = kwds.get("runs", [])
    if not (dataset or block or lfn):
        return
    url = "%s/%ss" % (url, entity)  # DBS3 APIs use plural entity value
    if dataset:
        params = {"dataset": dataset}
    elif block:
        params = {"block_name": block}
    elif lfn:
        params = {"logical_file_name": lfn}
    if runs:
        params.update({"run_num": runrange(runs[0], runs[-1], False)})
    headers = {"Accept": "application/json;text/json"}
    source, expire = getdata(url, params, headers, expire, ckey=CKEY, cert=CERT, verbose=verbose)
    for row in json_parser(source, None):
        for rec in row:
            try:
                if isinstance(rec, basestring):
                    print dastimestamp("DBS3 ERROR:"), row
                elif entity == "file":
                    yield rec["logical_file_name"]
                elif entity == "block":
                    yield rec["block_name"]
                elif entity == "file":
                    yield rec["dataset"]
            except Exception as exp:
                msg = 'Fail to parse "%s", exception="%s"' % (rec, exp)
                print_exc(msg)
Exemple #20
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # DBS2 call
        query  = 'find count(file.name), count(block.name)'
        query += ' where dataset=%s and dataset.status=*' % dataset
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                totfiles  = row['dataset']['count_file.name']
                totblocks = row['dataset']['count_block.name']
                return totblocks, totfiles
            elif 'error' in row:
                raise Exception(row.get('reason', row['error']))
        # if we're here we didn't find a dataset, throw the error
        msg = 'empty set'
        raise Exception(msg)
    else:
        # we call filesummaries?dataset=dataset to get number of files/blks
        dbs_url += '/filesummaries'
        dbs_args = {'dataset': dataset}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for row in json_parser(source, None):
            totfiles  = row[0]['num_file']
            totblocks = row[0]['num_block']
            return totblocks, totfiles
Exemple #21
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr2 or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600  # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            found = 0
            if not doc:
                continue
            for key in doc.keys():
                if key.endswith("ConfigCacheID"):
                    ids.append(doc[key])
                    found += 1
            if not found:
                if 'id' in rec and 'key' in rec and rec['key'] == dataset:
                    if rec['id']:
                        ids.append(rec['id'])
    return ids
Exemple #22
0
 def parser(self, dasquery, dformat, data, api):
     "DAS parser for MCM data-service"
     prim_key = self.dasmapping.primary_key(self.name, api)
     gen = json_parser(data, self.logger)
     counter = 0
     for rec in gen:
         if "results" in rec:
             row = rec["results"]
         else:
             row = rec
         for key in ["_id", "_rev"]:
             if key in row:
                 del row[key]
         if row:
             if api == "dataset4mcm":
                 for val in row.values():
                     yield {"dataset": {"name": val}}
             else:
                 yield {"mcm": row}
         counter += 1
     msg = "api=%s, format=%s " % (api, dformat)
     msg += "prim_key=%s yield %s rows" % (prim_key, counter)
     self.logger.info(msg)
Exemple #23
0
def dbs_dataset4site_release(dbs_url, getdata, release):
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs2':
        # in DBS3 I'll use datasets API and pass release over there
        query = 'find dataset where release=%s' % release
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        prim_key = 'dataset'
        datasets = set()
        for row in qlxml_parser(source, prim_key):
            dataset = row['dataset']['dataset']
            yield dataset
    else:
        # we call datasets?release=release to get list of datasets
        dbs_args = \
        {'release_version': release, 'dataset_access_type':'PRODUCTION'}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        for rec in json_parser(source, None):
            for row in rec:
                yield row['dataset']
Exemple #24
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600  # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if not doc:
                continue
            if 'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if 'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Exemple #25
0
def get_ids(url, params, dataset, verbose=False):
    "Query either ReqMgr or WMStats and retrieve request ids"
    headers = {'Accept': 'application/json;text/json'}
    expire = 600 # dummy number, we don't need it here
    ids = []
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT,
                verbose=verbose)
    for row in json_parser(source, None):
        for rec in row.get('rows', []):
            doc = rec['doc']
            if  not doc:
                continue
            if  'ProcConfigCacheID' in doc:
                ids.append(doc['ProcConfigCacheID'])
            elif 'ConfigCacheID' in doc:
                ids.append(doc['ConfigCacheID'])
            elif 'SkimConfigCacheID' in doc:
                ids.append(doc['SkimConfigCacheID'])
            else:
                if  'id' in rec and 'key' in rec and rec['key'] == dataset:
                    ids.append(rec['id'])
    return ids
Exemple #26
0
 def parser_helper(self, query, dformat, source, api):
     """
     DBS3 data-service parser helper, it is used by parser method.
     """
     if api == "site4dataset":
         gen = json_parser(source, self.logger)
     else:
         gen = DASAbstractService.parser(self, query, dformat, source, api)
     if api == "site4dataset":
         sites = set()
         for rec in gen:
             if isinstance(rec, list):
                 for row in rec:
                     orig_site = row["origin_site_name"]
                     if orig_site not in sites:
                         sites.add(orig_site)
             else:
                 orig_site = rec.get("origin_site_name", None)
                 if orig_site and orig_site not in sites:
                     sites.add(orig_site)
         for site in sites:
             yield {"site": {"name": site}}
     elif api == "datasets" or api == "dataset_info":
         for row in gen:
             row["name"] = row["dataset"]
             del row["dataset"]
             yield {"dataset": row}
     elif api == "filesummaries":
         name = query.mongo_query["spec"]["dataset.name"]
         for row in gen:
             row["dataset"]["name"] = name
             yield row
     elif api == "summary4dataset_run" or api == "summary4block_run":
         spec = query.mongo_query.get("spec", {})
         dataset = spec.get("dataset.name", "")
         block = spec.get("block.name", "")
         run = spec.get("run.run_number", 0)
         if isinstance(run, dict):  # we got a run range
             if "$in" in run:
                 run = run["$in"]
             elif "$lte" in run:
                 run = range(run["$gte"], run["$lte"])
         for row in gen:
             if run:
                 row.update({"run": run})
             if dataset:
                 row.update({"dataset": dataset})
             if block:
                 row.update({"block": block})
             yield row
     elif api == "blockorigin":
         for row in gen:
             yield row
     elif api == "blockparents":
         for row in gen:
             try:
                 del row["parent"]["this_block_name"]
             except:
                 pass
             yield row
     elif api == "fileparents":
         for row in gen:
             parent = row["parent"]
             for val in parent["parent_logical_file_name"]:
                 yield dict(name=val)
     elif api == "runs_via_dataset" or api == "runs":
         for row in gen:
             values = row["run"]["run_num"]
             if isinstance(values, list):
                 for val in values:
                     yield dict(run_number=val)
             else:
                 yield dict(run_number=values)
     elif api == "filechildren":
         for row in gen:
             parent = row["child"]
             for val in parent["child_logical_file_name"]:
                 yield dict(name=val)
     elif api == "files" or api == "files_via_dataset" or api == "files_via_block":
         status = "VALID"
         for row in gen:
             if "spec" in query.mongo_query:
                 if "status.name" in query.mongo_query["spec"]:
                     status = query.mongo_query["spec"]["status.name"]
             file_status = row["file"]["is_file_valid"]
             if status == "INVALID":  # filter out valid files
                 if int(file_status) == 1:  # valid status
                     row = None
             else:  # filter out invalid files
                 if int(file_status) == 0:  # invalid status
                     row = None
             if row:
                 yield row
     elif api == "filelumis" or api == "filelumis4block":
         for row in gen:
             if "lumi" in row:
                 if "lumi_section_num" in row["lumi"]:
                     val = row["lumi"]["lumi_section_num"]
                     row["lumi"]["lumi_section_num"] = convert2ranges(val)
                 yield row
             else:
                 yield row
     else:
         for row in gen:
             yield row
Exemple #27
0
 def parser_helper(self, query, dformat, source, api):
     """
     DBS3 data-service parser helper, it is used by parser method.
     """
     if  api in ['site4dataset', 'site4block']:
         gen = json_parser(source, self.logger)
     else:
         gen = DASAbstractService.parser(self, query, dformat, source, api)
     if  api in ['site4dataset', 'site4block']:
         sites = set()
         for rec in gen:
             if  isinstance(rec, list):
                 for row in rec:
                     orig_site = row['origin_site_name']
                     if  orig_site not in sites:
                         sites.add(orig_site)
             else:
                 orig_site = rec.get('origin_site_name', None)
                 if  orig_site and orig_site not in sites:
                     sites.add(orig_site)
         for site in sites:
             yield {'site': {'name': site}}
     elif api == 'datasets' or api == 'dataset_info' or api == 'datasetlist':
         for row in gen:
             row['name'] = row['dataset']
             del row['dataset']
             yield {'dataset':row}
     elif api == 'filesummaries':
         name = query.mongo_query['spec']['dataset.name']
         for row in gen:
             row['dataset']['name'] = name
             yield row
     elif api == 'summary4dataset_run' or api == 'summary4block_run':
         spec = query.mongo_query.get('spec', {})
         dataset = spec.get('dataset.name', '')
         block = spec.get('block.name', '')
         run = spec.get('run.run_number', 0)
         if  isinstance(run, dict): # we got a run range
             if  '$in' in run:
                 run = run['$in']
             elif '$lte' in run:
                 run = range(run['$gte'], run['$lte'])
         for row in gen:
             if  run:
                 row.update({"run": run})
             if  dataset:
                 row.update({"dataset": dataset})
             if  block:
                 row.update({"block": block})
             yield row
     elif api == 'releaseversions':
         for row in gen:
             values = row['release']['release_version']
             for val in values:
                 yield dict(release=dict(name=val))
     elif api == 'datasetaccesstypes':
         for row in gen:
             values = row['status']['dataset_access_type']
             for val in values:
                 yield dict(status=dict(name=val))
     elif api == 'blockorigin':
         for row in gen:
             yield row
     elif api == 'blockparents':
         for row in gen:
             try:
                 del row['parent']['this_block_name']
             except:
                 pass
             yield row
     elif api == 'fileparents':
         for row in gen:
             parent = row['parent']
             for val in parent['parent_logical_file_name']:
                 yield dict(name=val)
     elif api == 'runs_via_dataset' or api == 'runs':
         for row in gen:
             values = row.get('run', {}).get('run_num', 'N/A')
             if  isinstance(values, list):
                 for val in values:
                     yield dict(run_number=val)
             else:
                 yield dict(run_number=values)
     elif api == 'filechildren':
         for row in gen:
             parent = row['child']
             for val in parent['child_logical_file_name']:
                 yield dict(name=val)
     elif api == 'files' or api == 'files_via_dataset' or \
         api == 'files_via_block':
         status = 'VALID'
         for row in gen:
             if  'spec' in query.mongo_query:
                 if  'status.name' in query.mongo_query['spec']:
                     status = query.mongo_query['spec']['status.name']
             try:
                 file_status = row['file']['is_file_valid']
             except KeyError:
                 file_status = 0 # file status is unknown
             if  status == '*': # any file
                 pass
             elif  status == 'INVALID': # filter out valid files
                 if  int(file_status) == 1:# valid status
                     row = None
             else: # filter out invalid files
                 if  int(file_status) == 0:# invalid status
                     row = None
             if  row:
                 yield row
     elif api == 'filelumis' or api == 'filelumis4block':
         for row in gen:
             if  'lumi' in row:
                 if  'lumi_section_num' in row['lumi']:
                     val = row['lumi']['lumi_section_num']
                     row['lumi']['lumi_section_num'] = convert2ranges(val)
                 yield row
             else:
                 yield row
     else:
         for row in gen:
             yield row