Example #1
0
def dbs_dataset4site_release(dbs_url, release):
    "Get dataset for given site and release"
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # in DBS3 I'll use datasets API and pass release over there
        query = 'find dataset where release=%s' % release
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                dataset = row['dataset']['dataset']
                yield dataset
            elif 'error' in row:
                err = row.get('reason', None)
                err = err if err else row['error']
                yield 'DBS error: %s' % err
    else:
        # we call datasets?release=release to get list of datasets
        dbs_url += '/datasets'
        dbs_args = \
        {'release_version': release, 'dataset_access_type':'VALID'}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for rec in json_parser(source, None):
            for row in rec:
                yield row['dataset']
Example #2
0
def datasets_dbs2(urls, verbose=0):
    """DBS2 implementation of datasets function"""
    headers = {'Accept':'application/xml;text/xml'}
    records = []
    url     = urls.get('dbs')
    query   = 'find dataset,dataset.tier,dataset.era where dataset.status like VALID*'
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    stream, _ = getdata(url, params, headers, verbose=verbose)
    records = [r for r in qlxml_parser(stream, 'dataset')]
    stream.close()
    data = {}
    size = 10 # size for POST request to Phedex
    for row in records:
        dataset = row['dataset']
        if  not data.has_key(dataset['dataset']):
            data[dataset['dataset']] = \
            dict(era=dataset['dataset.era'], tier=dataset['dataset.tier'])
        if  len(data.keys()) > size:
            for rec in dataset_info(urls, data):
                yield rec
            data = {}
    if  data:
        for rec in dataset_info(urls, data):
            yield rec
    del records
Example #3
0
def dataset_summary(dbs_url, getdata, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs2':
        # DBS2 call
        query = 'find count(file.name), count(block.name) where dataset=%s'\
                 % dataset
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        prim_key = 'dataset'
        datasets = set()
        for row in qlxml_parser(source, prim_key):
            totfiles  = row['dataset']['count_file.name']
            totblocks = row['dataset']['count_block.name']
            return totblocks, totfiles
    else:
        # we call filesummaries?dataset=dataset to get number of files/blks
        dbs_args = {'dataset': dataset}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        for row in json_parser(source, None):
            totfiles  = row[0]['num_file']
            totblocks = row[0]['num_block']
            return totblocks, totfiles
Example #4
0
File: utils_t.py Project: ktf/DAS
    def test_xml_parser_4(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<results>
<row>
  <name>/c1.root</name>
  <size>1</size>
</row>
<row>
  <name>/c2.root</name>
  <size>2</size>
</row>
</results>
"""
        fdesc  = tempfile.NamedTemporaryFile()
        fname  = fdesc.name
        stream = file(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = file(fname, 'r')
        gen    = qlxml_parser(stream, "file")
        result = [r for r in gen]
        expect = [{'file': {'name': '/c1.root', 'size': 1}}, 
                  {'file': {'name': '/c2.root', 'size': 2}}]
        self.assertEqual(expect, result)
Example #5
0
def dbs_find(entity, url, kwds):
    "Find files for given set of parameters"
    if  entity not in ['run', 'file', 'block']:
        msg = 'Unsupported entity key=%s' % entity
        raise Exception(msg)
    expire  = 600
    dataset = kwds.get('dataset', None)
    block   = kwds.get('block', None)
    lfn     = kwds.get('lfn', None)
    runs    = kwds.get('runs', [])
    if  not (dataset or block or lfn):
        return
    query = 'find %s' % entity
    if  dataset:
        query += ' where dataset=%s' % dataset
    elif block:
        query += ' where block=%s' % block
    elif lfn:
        query += ' where file=%s' % lfn
    if  runs:
        rcond   = ' or '.join(['run=%s' % r for r in runs])
        query  += ' and (%s)' % rcond
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(url, params, headers, expire, ckey=CKEY, cert=CERT)
    pkey    = entity
    for row in qlxml_parser(source, pkey):
        val = row[entity][entity]
        yield val
Example #6
0
 def datasets_dbs(self):
     """
     Retrieve a list of DBS datasets (DBS2)
     """
     query = "find dataset,dataset.status"
     params = {"api": "executeQuery", "apiversion": "DBS_2_0_9", "query": query}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + "?" + encoded_data
     req = urllib2.Request(url)
     try:
         stream = urllib2.urlopen(req)
     except urllib2.HTTPError:
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     except Exception as exc:
         print_exc(exc)
         msg = "Fail to contact %s" % url
         print dastimestamp("DAS ERROR"), msg
         raise Exception(msg)
     gen = qlxml_parser(stream, "dataset")
     for row in gen:
         dataset = row["dataset"]["dataset"]
         rec = {"dataset": dataset}
         if self.write_hash:
             storage_query = {
                 "fields": ["dataset"],
                 "spec": [{"key": "dataset.name", "value": '"%s"' % dataset}],
                 "instance": self.dbcoll,
             }
             rec.update({"qhash": genkey(storage_query)})
         if row["dataset"]["dataset.status"] == "VALID":
             yield rec
     stream.close()
Example #7
0
File: dbs_rr.py Project: ktf/DAS
def runs_dbs2(url, dataset, ckey, cert):
    "Retrive list of run from DBS2 for a given dataset"
    query    = "find run where dataset=%s" % dataset
    params   = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query)
    data, _  = getdata(url, params, ckey=ckey, cert=cert, system='combined')
    prim_key = 'run'
    for row in qlxml_parser(data, prim_key):
        run  = row['run']['run']
        yield run
Example #8
0
def run_lumis_dbs2(url, dataset, ckey, cert):
    "Retrive list of run/lumis from DBS2 for a given dataset"
    query    = "find run, lumi where dataset=%s" % dataset
    params   = dict(api='executeQuery', apiversion='DBS_2_0_9', query=query)
    data, _  = getdata(url, params, ckey=ckey, cert=cert, system='combined')
    prim_key = 'run'
    res = {} # output result
    for row in qlxml_parser(data, prim_key):
        run  = row['run']['run']
        lumi = row['run']['lumi']
        res.setdefault(run, []).append(lumi)
    return res
Example #9
0
def datasets_dbs2(urls, verbose=0):
    """DBS2 implementation of datasets function"""
    headers = {'Accept':'application/xml;text/xml'}
    records = []
    url     = urls.get('dbs')
    query   = \
        'find dataset,dataset.tier,dataset.era where dataset.status like VALID*'
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    stream, _ = getdata(url, params, headers, post=False, \
            ckey=CKEY, cert=CERT, verbose=verbose, system='dbs')
    records = [r for r in qlxml_parser(stream, 'dataset')]
    stream.close()
    dbsdata = {}
    for row in records:
        dataset = row['dataset']
        if  dataset['dataset'] not in dbsdata:
            dbsdata[dataset['dataset']] = \
                dict(era=dataset['dataset.era'],
                        tier=dataset['dataset.tier'], status='VALID')
    for row in phedex_info(urls, dbsdata):
        yield row
Example #10
0
 def datasets_dbs(self):
     """
     Retrieve a list of DBS datasets (DBS2)
     """
     query = 'find dataset,dataset.status'
     params = {'api': 'executeQuery', 'apiversion': 'DBS_2_0_9',
               'query':query}
     encoded_data = urllib.urlencode(params, doseq=True)
     url = self.dbs_url + '?' + encoded_data
     req = urllib2.Request(url)
     try:
         stream = urllib2.urlopen(req)
     except Exception as exc:
         print_exc(exc)
         msg = 'Fail to contact %s' % url
         raise Exception(msg)
     gen = qlxml_parser(stream, 'dataset')
     for row in gen:
         if  row['dataset']['dataset.status'] == 'VALID':
             yield dict(dataset=row['dataset']['dataset'])
     stream.close()
Example #11
0
def block_run_lumis(url, blocks, runs=None):
    """
    Find block, run, lumi tuple for given set of files and (optional) runs.
    """
    headers = {'Accept': 'text/xml'}
    urls = []
    for blk in blocks:
        if  not blk:
            continue
        query   = 'find block,run,lumi where block=%s' % blk
        if  runs and isinstance(runs, list):
            val = ' or '.join(['run=%s' % r for r in runs])
            query += ' and (%s)' % val
        params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9',
                   'query':query}
        dbs_url = url + '?' + urllib.urlencode(params)
        urls.append(dbs_url)
    if  not urls:
        return
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    prim_key = 'row'
    odict = {} # output dict
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            print dastimestamp('DAS ERROR'), error, reason
            yield {'error': error, 'reason': reason}
        else:
            source   = StringIO.StringIO(rec['data'])
            lumis    = []
            for row in qlxml_parser(source, prim_key):
                run  = row['row']['run']
                blk  = row['row']['block']
                lumi = row['row']['lumi']
                key  = (blk, run)
                odict.setdefault(key, []).append(lumi)
    for key, lumis in odict.iteritems():
        blk, run = key
        yield blk, run, lumis
Example #12
0
File: utils_t.py Project: ktf/DAS
    def test_xml_parser_3(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<results>
<row>
  <dataset>/a/b/c</dataset>
  <nblocks>25</nblocks>
</row>
</results>
"""
        fdesc  = tempfile.NamedTemporaryFile()
        fname  = fdesc.name
        stream = file(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = file(fname, 'r')
        gen    = qlxml_parser(stream, "dataset")
        result = gen.next()
        expect = {'dataset': {'dataset':'/a/b/c', 'nblocks': 25}}
        self.assertEqual(expect, result)
Example #13
0
    def test_xml_parser_3(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<results>
<row>
  <dataset>/a/b/c</dataset>
  <nblocks>25</nblocks>
</row>
</results>
"""
        fdesc = tempfile.NamedTemporaryFile()
        fname = fdesc.name
        stream = open(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = open(fname, 'r')
        gen = qlxml_parser(stream, "dataset")
        result = next(gen)
        expect = {'dataset': {'dataset': '/a/b/c', 'nblocks': 25}}
        self.assertEqual(expect, result)
Example #14
0
def dataset_summary(dbs_url, dataset):
    """
    Invoke DBS2/DBS3 call to get information about total
    number of filesi/blocks in a given dataset.
    """
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs':
        # DBS2 call
        query  = 'find count(file.name), count(block.name)'
        query += ' where dataset=%s and dataset.status=*' % dataset
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs')
        prim_key = 'dataset'
        for row in qlxml_parser(source, prim_key):
            if  'dataset' in row:
                totfiles  = row['dataset']['count_file.name']
                totblocks = row['dataset']['count_block.name']
                return totblocks, totfiles
            elif 'error' in row:
                raise Exception(row.get('reason', row['error']))
        # if we're here we didn't find a dataset, throw the error
        msg = 'empty set'
        raise Exception(msg)
    else:
        # we call filesummaries?dataset=dataset to get number of files/blks
        dbs_url += '/filesummaries'
        dbs_args = {'dataset': dataset}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = \
            getdata(dbs_url, dbs_args, headers, expire, ckey=CKEY, cert=CERT,
                    system='dbs3')
        for row in json_parser(source, None):
            totfiles  = row[0]['num_file']
            totblocks = row[0]['num_block']
            return totblocks, totfiles
Example #15
0
def dbs_dataset4site_release(dbs_url, getdata, release):
    expire = 600 # set some expire since we're not going to use it
    if  which_dbs(dbs_url) == 'dbs2':
        # in DBS3 I'll use datasets API and pass release over there
        query = 'find dataset where release=%s' % release
        dbs_args = {'api':'executeQuery', 'apiversion': 'DBS_2_0_9', \
                    'query':query}
        headers = {'Accept': 'text/xml'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        prim_key = 'dataset'
        datasets = set()
        for row in qlxml_parser(source, prim_key):
            dataset = row['dataset']['dataset']
            yield dataset
    else:
        # we call datasets?release=release to get list of datasets
        dbs_args = \
        {'release_version': release, 'dataset_access_type':'PRODUCTION'}
        headers = {'Accept': 'application/json;text/json'}
        source, expire = getdata(dbs_url, dbs_args, expire, headers)
        for rec in json_parser(source, None):
            for row in rec:
                yield row['dataset']
Example #16
0
    def test_xml_parser_4(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<results>
<row>
  <name>/c1.root</name>
  <size>1</size>
</row>
<row>
  <name>/c2.root</name>
  <size>2</size>
</row>
</results>
"""
        fdesc = tempfile.NamedTemporaryFile()
        fname = fdesc.name
        stream = open(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = open(fname, 'r')
        gen = qlxml_parser(stream, "file")
        result = [r for r in gen]
        expect = [{
            'file': {
                'name': '/c1.root',
                'size': 1
            }
        }, {
            'file': {
                'name': '/c2.root',
                'size': 2
            }
        }]
        self.assertEqual(expect, result)
Example #17
0
File: utils_t.py Project: ktf/DAS
    def test_xml_parser_5(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' standalone='yes'?>
<!-- DBS Version 1 -->
<dbs>
 <userinput>
  <input>find dataset where  tier=*GEN* and primds=ZJetToEE_Pt*
  </input>
  <timeStamp>Mon Feb 07 19:51:59 CET 2011</timeStamp>
 </userinput>
 <java_query> 
  <sql>GROUP BY  PATH</sql>
  <bp>%GEN%</bp>
  <bp>ZJetToEE_Pt%</bp>
 </java_query>
 <python_query>
  <sql>SELECT  PATH AS PATH,</sql>
  <bindparams><p0>%GEN%</p0>
   <p1>ZJetToEE_Pt%</p1>
  </bindparams>
 </python_query>
 <count_query>
  <sql> SELECT COUNT(*) AS CNT FROM </sql>
  <bindparams> <p0>%GEN%</p0>
   <p1>ZJetToEE_Pt%</p1>
  </bindparams>
 </count_query>
 <results>"""
 
        suffix = """ </results>
<SUCCESS/>
</dbs>
"""
        row = """<row>
  <dataset>/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/</dataset>
  <sum_block.numfiles>%d</sum_block.numfiles>
  <sum_block.numevents>110000</sum_block.numevents>
  <count_block>1</count_block>
  <sum_block.size>61942523513</sum_block.size>
</row>"""
        for i in range(200):
            xmldata = xmldata + row % i
        xmldata = xmldata + suffix  
        fdesc  = tempfile.NamedTemporaryFile()
        fname  = fdesc.name
        stream = file(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = file(fname, 'r')
        gen    = qlxml_parser(stream, "dataset")
        expect = {'dataset': {'sum_block.numfiles': 12, 
                              'count_block': 1, 
                              'sum_block.numevents': 110000, 
                              'sum_block.size': 61942523513, 
                  'dataset': '/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/'}}
        count = 0
        for r in gen:
            expect['dataset']['sum_block.numfiles'] = count
            self.assertEqual(expect, r)
            count = count + 1
        self.assertEqual(200, count)
Example #18
0
    def test_xml_parser_5(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' standalone='yes'?>
<!-- DBS Version 1 -->
<dbs>
 <userinput>
  <input>find dataset where  tier=*GEN* and primds=ZJetToEE_Pt*
  </input>
  <timeStamp>Mon Feb 07 19:51:59 CET 2011</timeStamp>
 </userinput>
 <java_query> 
  <sql>GROUP BY  PATH</sql>
  <bp>%GEN%</bp>
  <bp>ZJetToEE_Pt%</bp>
 </java_query>
 <python_query>
  <sql>SELECT  PATH AS PATH,</sql>
  <bindparams><p0>%GEN%</p0>
   <p1>ZJetToEE_Pt%</p1>
  </bindparams>
 </python_query>
 <count_query>
  <sql> SELECT COUNT(*) AS CNT FROM </sql>
  <bindparams> <p0>%GEN%</p0>
   <p1>ZJetToEE_Pt%</p1>
  </bindparams>
 </count_query>
 <results>"""

        suffix = """ </results>
<SUCCESS/>
</dbs>
"""
        row = """<row>
  <dataset>/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/</dataset>
  <sum_block.numfiles>%d</sum_block.numfiles>
  <sum_block.numevents>110000</sum_block.numevents>
  <count_block>1</count_block>
  <sum_block.size>61942523513</sum_block.size>
</row>"""
        for i in range(200):
            xmldata = xmldata + row % i
        xmldata = xmldata + suffix
        fdesc = tempfile.NamedTemporaryFile()
        fname = fdesc.name
        stream = open(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = open(fname, 'r')
        gen = qlxml_parser(stream, "dataset")
        expect = {
            'dataset': {
                'sum_block.numfiles': 12,
                'count_block': 1,
                'sum_block.numevents': 110000,
                'sum_block.size': 61942523513,
                'dataset': '/ZJetToEE_Pt_80to120_TuneZ2_7TeV_pythia6/'
            }
        }
        count = 0
        for r in gen:
            expect['dataset']['sum_block.numfiles'] = count
            self.assertEqual(expect, r)
            count = count + 1
        self.assertEqual(200, count)
Example #19
0
 def parser(self, dasquery, dformat, source, api):
     """
     DBS data-service parser.
     """
     sitedb = SERVICES.get('sitedb2', None) # look-up SiteDB from global scope
     query = dasquery.mongo_query
     if  api == 'listBlocks':
         prim_key = 'block'
     elif api == 'listBlocks4path':
         api = 'listBlocks'
         prim_key = 'block'
     elif api == 'listBlockProvenance':
         prim_key = 'block'
     elif api == 'listBlockProvenance4child':
         prim_key = 'block'
     elif api == 'listFiles':
         prim_key = 'file'
     elif api == 'listLFNs':
         prim_key = 'file_lfn'
     elif api == 'listFileLumis':
         prim_key = 'file_lumi_section'
     elif api == 'listFileProcQuality':
         prim_key = 'file_proc_quality'
     elif api == 'listFileParents':
         prim_key = 'file_parent'
     elif api == 'listTiers':
         prim_key = 'data_tier'
     elif api == 'listDatasetParents':
         prim_key = 'processed_dataset_parent'
     elif api == 'listPrimaryDatasets':
         prim_key = 'primary_dataset'
     elif api == 'listProcessedDatasets':
         prim_key = 'processed_dataset'
     elif api == 'fakeReleases':
         prim_key = 'release'
     elif api == 'listRuns':
         prim_key = 'run'
     elif  api == 'fakeRelease4File':
         prim_key = 'release'
     elif  api == 'fakeRelease4Dataset':
         prim_key = 'release'
     elif  api == 'fakeGroup4Dataset':
         prim_key = 'group'
     elif  api == 'fakeConfig':
         prim_key = 'config'
     elif  api == 'fakeListDataset4Block':
         prim_key = 'dataset'
     elif  api == 'fakeListDataset4File':
         prim_key = 'dataset'
     elif  api == 'fakeListDatasetbyDate':
         prim_key = 'dataset'
     elif  api == 'fakeDatasetSummary':
         prim_key = 'dataset'
     elif  api == 'fakeDataset4Run':
         prim_key = 'dataset'
     elif  api == 'fakeRun4File':
         prim_key = 'run'
     elif  api == 'fakeRun4Run':
         prim_key = 'run'
     elif api == 'fakeChild4File':
         prim_key = 'child'
     elif api == 'fakeChild4Dataset':
         prim_key = 'child'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     elif api == 'fakeStatus':
         prim_key = 'status'
     elif api == 'fakeFiles4DatasetRunLumis':
         prim_key = 'file'
     elif api == 'fakeRun4Block':
         prim_key = 'run'
     elif api == 'fakeBlock4DatasetRun':
         prim_key = 'block'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     else:
         msg = 'DBSService::parser, unsupported %s API %s' \
             % (self.name, api)
         raise Exception(msg)
     if  api.find('fake') != -1:
         gen = qlxml_parser(source, prim_key)
     else:
         gen = xml_parser(source, prim_key)
     useless_run_atts = ['number_of_events', 'number_of_lumi_sections', \
             'id', 'total_luminosity', 'store_number', 'end_of_run', \
             'start_of_run']
     config_attrs = ['config.name', 'config.content', 'config.version', \
              'config.type', 'config.annotation', 'config.createdate', \
              'config.createby', 'config.moddate', 'config.modby']
     for row in gen:
         if  not row:
             continue
         if  row.has_key('status') and \
             row['status'].has_key('dataset.status'):
             row['status']['name'] = row['status']['dataset.status']
             del row['status']['dataset.status']
         if  row.has_key('file_lumi_section'):
             row['lumi'] = row['file_lumi_section']
             del row['file_lumi_section']
         if  row.has_key('algorithm'):
             del row['algorithm']['ps_content']
         if  row.has_key('processed_dataset') and \
             row['processed_dataset'].has_key('path'):
             if  isinstance(row['processed_dataset']['path'], dict) \
             and row['processed_dataset']['path'].has_key('dataset_path'):
                 path = row['processed_dataset']['path']['dataset_path']
                 del row['processed_dataset']['path']
                 row['processed_dataset']['name'] = path
         # case for fake apis
         # remove useless attribute from results
         if  row.has_key('dataset'):
             if  row['dataset'].has_key('count_file.size'):
                 del row['dataset']['count_file.size']
             if  row['dataset'].has_key('dataset'):
                 name = row['dataset']['dataset']
                 del row['dataset']['dataset']
                 row['dataset']['name'] = name
         if  row.has_key('child') and row['child'].has_key('dataset.child'):
             row['child']['name'] = row['child']['dataset.child']
             del row['child']['dataset.child']
         if  row.has_key('child') and row['child'].has_key('file.child'):
             row['child']['name'] = row['child']['file.child']
             del row['child']['file.child']
         if  row.has_key('block') and query.get('fields') == ['parent']:
             row['parent'] = row['block']
             del row['block']
         if  row.has_key('block') and query.get('fields') == ['child']:
             row['child'] = row['block']
             del row['block']
         if  row.has_key('run') and row['run'].has_key('run'):
             row['run']['run_number'] = row['run']['run']
             del row['run']['run']
         if  row.has_key('release') and row['release'].has_key('release'):
             row['release']['name'] = row['release']['release']
             del row['release']['release']
         if  row.has_key('site'):
             row['site']['se'] = row['site']['site']
             del row['site']['site']
         convert_dot(row, 'config', config_attrs)
         convert_dot(row, 'file', ['file.name'])
         convert_dot(row, 'block', ['block.name'])
         convert_dot(row, 'dataset', ['dataset.tag', 'dataset.status'])
         # remove DBS2 run attributes (to be consistent with DBS3 output)
         # and let people extract this info from CondDB/LumiDB.
         if  row.has_key('run'):
             for att in useless_run_atts:
                 try:
                     del row['run'][att]
                 except:
                     pass
         if  api == 'fakeSite4Dataset' and sitedb:
             site = row.get('site', None)
             if  site and isinstance(site, dict):
                 sename = site.get('se', None)
                 info = sitedb.site_info(sename)
                 if  info:
                     row['site'].update(info)
         yield row
Example #20
0
def summary4dataset_run(url, kwds):
    "Helper function to deal with summary dataset=/a/b/c requests"
    urls = []
    cond = ''
    val = kwds.get('run', 'optional')
    if  val != 'optional':
        if  isinstance(val, dict):
            min_run = 0
            max_run = 0
            if  '$lte' in val:
                max_run = val['$lte']
            if  '$gte' in val:
                min_run = val['$gte']
            if  min_run and max_run:
                val = "run >=%s and run <= %s" % (min_run, max_run)
            elif '$in' in val:
                val = ' or '.join(['run=%s' % r for r in val['$in']])
                val = '(%s)' % val
        elif isinstance(val, int):
            val = "run=%d" % val
        cond += ' and %s' % val
    val = kwds.get('dataset', None)
    if  val and val != 'optional':
        cond += ' and dataset=%s' % val
    val = kwds.get('block', None)
    if  val and val != 'optional':
        cond += ' and block=%s' % val
    query = "find file, file.size, file.numevents where " + cond[4:]
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    url1 = url + '?' + urllib.urlencode(params)
    urls.append(url1)
    query = "find run, count(lumi) where " + cond[4:]
    params  = {'api':'executeQuery', 'apiversion':'DBS_2_0_9', 'query':query}
    url2 = url + '?' + urllib.urlencode(params)
    urls.append(url2)
    headers = {'Accept': 'text/xml'}
    gen = urlfetch_getdata(urls, CKEY, CERT, headers)
    tot_size  = 0
    tot_evts  = 0
    tot_lumis = 0
    tot_files = 0
    for rec in gen:
        if  'error' in rec:
            error  = rec.get('error')
            reason = rec.get('reason', '')
            srec = {'summary':'', 'error':error, 'reason':reason}
            yield srec
        url = rec['url']
        data = rec['data']
        stream = StringIO.StringIO(data)
        if  url.find('file') != -1:
            prim_key = 'file'
        else:
            prim_key = 'run'
        for row in qlxml_parser(stream, prim_key):
            if  prim_key == 'file':
                fdata = row['file']
                tot_size  += fdata['file.size']
                tot_evts  += fdata['file.numevents']
                tot_files += 1
            else:
                fdata = row['run']
                tot_lumis += fdata['count_lumi']
    srec = {'summary': {'file_size':tot_size, 'nevents':tot_evts,
        'nlumis':tot_lumis, 'nfiles': tot_files}}
    yield srec