Exemple #1
0
    def test_xml_parser(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<phedex attr="a">
<block bytes="1">
<file size="10">
</file>
</block>
</phedex>
"""
        fdesc = tempfile.NamedTemporaryFile()
        fname = fdesc.name
        stream = open(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = open(fname, 'r')
        gen = xml_parser(stream, "block", [])
        result = next(gen)
        expect = {'block': {'bytes': 1, 'file': {'size': 10}}}
        self.assertEqual(expect, result)

        stream = open(fname, 'r')
        gen = xml_parser(stream, "file", ["block.bytes"])
        result = next(gen)
        expect = {'file': {'block': {'bytes': 1}, 'size': 10}}
        self.assertEqual(expect, result)
Exemple #2
0
    def test_xml_parser(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<phedex attr="a">
<block bytes="1">
<file size="10">
</file>
</block>
</phedex>
"""
        fdesc  = tempfile.NamedTemporaryFile()
        fname  = fdesc.name
        stream = file(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = file(fname, 'r')
        gen    = xml_parser(stream, "block", [])
        result = gen.next()
        expect = {'block': {'bytes': 1, 'file': {'size': 10}}}
        self.assertEqual(expect, result)

        stream = file(fname, 'r')
        gen    = xml_parser(stream, "file", ["block.bytes"])
        result = gen.next()
        expect = {'file': {'block': {'bytes': 1}, 'size': 10}}
        self.assertEqual(expect, result)
Exemple #3
0
def phedex_files(phedex_url, kwds):
    "Get file information from Phedex"
    params = dict(kwds)  # parameters to be send to Phedex
    site = kwds.get('site', None)
    if site and phedex_node_pattern.match(site):
        if not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
        params.pop('site')
    elif site and se_pattern.match(site):
        params.update({'se': site})
        params.pop('site')
    else:
        return
    expire = 600  # set some expire since we're not going to use it
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT,
                system='phedex')
    tags = 'block.file.name'
    prim_key = 'block'
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        files = ddict.get('block.file')
        if not isinstance(files, list):
            files = [files]
        for row in files:
            yield row['name']
Exemple #4
0
    def test_xml_parser_2(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<RUNS>
<RUN id="751084">
<LUMI>
<NUMBER>1</NUMBER>
<PROP>avx</PROP>
<TEST>
<FOO>1</FOO>
<BOO>2</BOO>
</TEST>
</LUMI>
</RUN>
</RUNS>
"""
        fdesc  = tempfile.NamedTemporaryFile()
        fname  = fdesc.name
        stream = file(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = file(fname, 'r')
        gen    = xml_parser(stream, "RUNS", [])
        result = gen.next()
        expect = {'RUNS': {'RUN': {'id': 751084.0, 
                                   'LUMI': {'TEST': {'FOO': 1, 'BOO': 2},
                                            'NUMBER': 1, 
                                            'PROP': 'avx'}
                                  }
                          }
                 }
        self.assertEqual(expect, result)
Exemple #5
0
def phedex_files(phedex_url, kwds):
    "Get file information from Phedex"
    params = dict(kwds) # parameters to be send to Phedex
    site = kwds.get('site', None)
    if  site and phedex_node_pattern.match(site):
        if  not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
        params.pop('site')
    elif site and se_pattern.match(site):
        params.update({'se': site})
        params.pop('site')
    else:
        return
    expire = 600 # set some expire since we're not going to use it
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_url, params, headers, expire, ckey=CKEY, cert=CERT,
                system='phedex')
    tags = 'block.file.name'
    prim_key = 'block'
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        files = ddict.get('block.file')
        if  not isinstance(files, list):
            files = [files]
        for row in files:
            yield row['name']
Exemple #6
0
def files4site(phedex_url, files, site):
    "Find site for given files"

    params = {}
    if site and phedex_node_pattern.match(site):
        if not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
    elif site and se_pattern.match(site):
        params.update({'se': site})
    else:
        return
    sname = urllib.urlencode(params)
    urls = []
    for fname in files:
        url = '%s?lfn=%s&%s' % (phedex_url, fname, sname)
        urls.append(url)
    tags = 'block.replica.node'
    prim_key = 'block'
    gen = urlfetch_getdata(urls, CKEY, CERT)
    for rec in gen:
        if 'error' in rec.keys():
            yield rec
        else:
            # convert record string into StringIO for xml_parser
            source = StringIO.StringIO(rec['data'])
            for row in xml_parser(source, prim_key, tags):
                fobj = row['block']['file']
                fname = fobj['name']
                replica = fobj['replica']
                for item in replica:
                    yield fname
Exemple #7
0
def files4site(phedex_url, files, site):
    "Find site for given files"

    params = {}
    if  site and phedex_node_pattern.match(site):
        if  not site.endswith('*'):
            # this will account to look-up site names w/o _Buffer or _MSS
            site += '*'
        params.update({'node': site})
    elif site and se_pattern.match(site):
        params.update({'se': site})
    else:
        return
    sname = urllib.urlencode(params)
    urls = []
    for fname in files:
        url = '%s?lfn=%s&%s' % (phedex_url, fname, sname)
        urls.append(url)
    tags = 'block.replica.node'
    prim_key = 'block'
    gen = urlfetch_getdata(urls, CKEY, CERT)
    for rec in gen:
        if  'error' in rec.keys():
            yield rec
        else:
            # convert record string into StringIO for xml_parser
            source = StringIO.StringIO(rec['data'])
            for row in xml_parser(source, prim_key, tags):
                fobj = row['block']['file']
                fname = fobj['name']
                replica = fobj['replica']
                for item in replica:
                    yield fname
Exemple #8
0
def site4dataset(dbs_url, phedex_api, args, expire):
    "Yield site information about given dataset"
    # DBS part
    dataset = args['dataset']
    try:
        totblocks, totfiles = dataset_summary(dbs_url, dataset)
    except Exception as err:
        error  = str(err)
        reason = "Can't find #block, #files info in DBS for dataset=%s" \
                % dataset
        yield {'site': {'error': error, 'reason': reason}}
        return
    # Phedex part
    phedex_args = {'dataset':args['dataset']}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_api, phedex_args, headers, expire, post=True,
                system='phedex')
    prim_key = 'block'
    tags = 'block.replica.node'
    site_info = {}
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        replicas = ddict.get('block.replica')
        if  not isinstance(replicas, list):
            replicas = [replicas]
        for row in replicas:
            if  not row or 'node' not in row:
                continue
            node = row['node']
            files = int(row['files'])
            complete = 1 if row['complete'] == 'y' else 0
            if  node in site_info:
                files = site_info[node]['files'] + files
                nblks  = site_info[node]['blocks'] + 1
                bc_val = site_info[node]['blocks_complete']
                b_complete = bc_val+1 if complete else bc_val
            else:
                b_complete = 1 if complete else 0
                nblks = 1
            site_info[node] = {'files': files, 'blocks': nblks,
                        'blocks_complete': b_complete}
    row = {}
    for key, val in site_info.iteritems():
        if  totfiles:
            nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles)
        else:
            nfiles = 'N/A'
        if  totblocks:
            nblks  = '%5.2f%%' % (100*float(val['blocks'])/totblocks)
        else:
            nblks = 'N/A'
        ratio = float(val['blocks_complete'])/val['blocks']
        b_completion = '%5.2f%%' % (100*ratio)
        row = {'site':{'name':key, 'dataset_fraction': nfiles,
            'block_fraction': nblks, 'block_completion': b_completion}}
        yield row
Exemple #9
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key  = self.dasmapping.primary_key(self.name, api)
        apitag    = self.dasmapping.apitag(self.name, api)
        counter   = 0
        if  dformat.lower() == 'xml':
            tags = self.dasmapping.api2daskey(self.name, api)
            gen  = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == 'json' or dformat.lower() == 'dasjson':
            gen  = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if  dformat.lower() == 'dasjson':
                    for key, val in row.iteritems():
                        if  key != 'results':
                            das_dict[key] = val
                    row = row['results']
                    self.analytics.update_apicall(\
                        dasquery.mongo_query, das_dict)
                if  apitag and row.has_key(apitag):
                    row = row[apitag]
                if  isinstance(row, list):
                    for item in row:
                        if  item.has_key(prim_key):
                            counter += 1
                            yield item
                        else:
                            counter += 1
                            yield {prim_key:item}
                else:
                    if  row.has_key(prim_key):
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key:row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg  = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #10
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key = self.dasmapping.primary_key(self.name, api)
        counter = 0
        if dformat.lower() == 'xml':
            tags = self.dasmapping.api2daskey(self.name, api)
            gen = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == 'json' or dformat.lower() == 'dasjson':
            gen = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if dformat.lower() == 'dasjson':
                    for key, val in row.items():
                        if key != 'results':
                            das_dict[key] = val
                    row = row['results']
                if isinstance(row, list):
                    for item in row:
                        if item:
                            if prim_key in item:
                                counter += 1
                                yield item
                            else:
                                counter += 1
                                yield {prim_key: item}
                else:
                    if prim_key in row:
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key: row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #11
0
    def parser(self, dasquery, dformat, data, api):
        """
        DAS data parser. Input parameters:

        - *query* input DAS query
        - *dformat* is a data format, e.g. XML, JSON
        - *data* is a data source, either file-like object or
          actual data
        - *api* is API name
        """
        prim_key = self.dasmapping.primary_key(self.name, api)
        counter = 0
        if dformat.lower() == "xml":
            tags = self.dasmapping.api2daskey(self.name, api)
            gen = xml_parser(data, prim_key, tags)
            for row in gen:
                counter += 1
                yield row
        elif dformat.lower() == "json" or dformat.lower() == "dasjson":
            gen = json_parser(data, self.logger)
            das_dict = {}
            for row in gen:
                if dformat.lower() == "dasjson":
                    for key, val in row.iteritems():
                        if key != "results":
                            das_dict[key] = val
                    row = row["results"]
                if isinstance(row, list):
                    for item in row:
                        if prim_key in item:
                            counter += 1
                            yield item
                        else:
                            counter += 1
                            yield {prim_key: item}
                else:
                    if prim_key in row:
                        counter += 1
                        yield row
                    else:
                        counter += 1
                        yield {prim_key: row}
        else:
            msg = 'Unsupported data format="%s", API="%s"' % (dformat, api)
            raise Exception(msg)
        msg = "api=%s, format=%s " % (api, dformat)
        msg += "prim_key=%s yield %s rows" % (prim_key, counter)
        self.logger.info(msg)
Exemple #12
0
    def test_xml_parser_2(self):
        """
        Test functionality of xml_parser
        """
        xmldata = """<?xml version='1.0' encoding='ISO-8859-1'?>
<RUNS>
<RUN id="751084">
<LUMI>
<NUMBER>1</NUMBER>
<PROP>avx</PROP>
<TEST>
<FOO>1</FOO>
<BOO>2</BOO>
</TEST>
</LUMI>
</RUN>
</RUNS>
"""
        fdesc = tempfile.NamedTemporaryFile()
        fname = fdesc.name
        stream = open(fname, 'w')
        stream.write(xmldata)
        stream.close()
        stream = open(fname, 'r')
        gen = xml_parser(stream, "RUNS", [])
        result = next(gen)
        expect = {
            'RUNS': {
                'RUN': {
                    'id': 751084.0,
                    'LUMI': {
                        'TEST': {
                            'FOO': 1,
                            'BOO': 2
                        },
                        'NUMBER': 1,
                        'PROP': 'avx'
                    }
                }
            }
        }
        self.assertEqual(expect, result)
Exemple #13
0
 def parser(self, query, dformat, source, api):
     """
     Phedex data-service parser.
     """
     tags = []
     if  api == 'blockReplicas':
         prim_key = 'block'
     elif api == 'fileReplicas':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'fileReplicas4dataset':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'fileReplicas4file':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'dataset4site':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4se':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4site_group':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4se_group':
         prim_key = 'block'
         tags = 'block'
     elif api == 'site4dataset':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'site4block':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'site4file':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'nodes':
         prim_key = 'node'
     elif api == 'nodeusage':
         prim_key = 'node'
     elif api == 'groups':
         prim_key = 'group'
     elif api == 'groupusage':
         prim_key = 'node'
     elif api == 'lfn2pfn':
         prim_key = 'mapping'
     elif api == 'tfc':
         prim_key = 'storage-mapping'
     else:
         msg = 'PhedexService::parser, unsupported %s API %s' \
             % (self.name, api)
         raise Exception(msg)
     gen = xml_parser(source, prim_key, tags)
     site_names = []
     seen = set()
     tot_files  = 0
     site_info_dict = {}
     for row in gen:
         if  api == 'nodeusage':
             if  row.has_key('node') and row['node'].has_key('name'):
                 row['name'] = row['node']['name']
         if  row.has_key('block') and row['block'].has_key('name'):
             if  not row['block'].has_key('dataset'):
                 dataset = row['block']['name'].split('#')[0]
                 row['block']['dataset'] = dataset
         if  api == 'site4dataset' or api == 'site4block':
             item = row['block']['replica']
             if  isinstance(item, list):
                 for replica in item:
                     result = get_replica_info(replica)
                     site_info(site_info_dict, row['block'], replica)
                     if  not replica['files']:
                         continue
                     if  result not in site_names:
                         site_names.append(result)
             elif isinstance(item, dict):
                 replica = item
                 result = get_replica_info(replica)
                 site_info(site_info_dict, row['block'], replica)
                 if  not replica['files']:
                     continue
                 result = get_replica_info(replica)
                 if  result not in site_names:
                     site_names.append(result)
         elif api == 'site4file':
             item = row['block']['file']['replica']
             if  isinstance(item, list):
                 for replica in item:
                     result = get_replica_info(replica)
                     if  result not in site_names:
                         site_names.append(result)
             elif isinstance(item, dict):
                 replica = item
                 result = get_replica_info(replica)
                 if  result not in site_names:
                     site_names.append(result)
         elif  api == 'dataset4site' or api == 'dataset4se' or \
             api == 'dataset4site_group' or api == 'dataset4se_group':
             if  row.has_key('block'):
                 dataset = row['block']['name'].split('#')[0]
                 seen.add(dataset)
         elif  api == 'fileReplicas' or api == 'fileReplicas4file' or \
             api == 'fileReplicas4dataset':
             try:
                 if  row.has_key('file') and isinstance(row['file'], dict):
                     rec = row['file']
                     cksum = rec['checksum']
                     if  cksum.find(',') != -1:
                         adler, cksum = cksum.split(',')
                         rec['adler32'] = adler.replace('adler32:', '')
                         rec['checksum'] = int(cksum.replace('cksum:', ''))
             except:
                 pass
             yield row
         else:
             yield row
     if  api == 'site4dataset' or api == 'site4block':
         for row in site_names:
             name = row['name']
             if  site_info_dict.has_key(name):
                 sdict      = site_info_dict[name]
                 sfiles     = float(sdict['files'])
                 tot_files  = float(sdict['totfiles'])
                 file_occ   = '%5.2f%%' % (100*sfiles/tot_files)
             else:
                 file_occ   = '0%%'
             row['replica_fraction'] = file_occ.strip()
             yield row
     if  api == 'site4file':
         for row in site_names:
             yield row
     del site_names
     del site_info_dict
     if  seen:
         for dataset in seen:
             yield {'dataset':dict(name=dataset)}
     del seen
Exemple #14
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if  args['site'].find('.') != -1: # it is SE
             phedex_args = {'dataset':list(datasets),
                             'se': '%s' % args['site']}
         else:
             phedex_args = {'dataset':list(datasets),
                             'node': '%s*' % args['site']}
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if  found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files}
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if  api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset':record}
         del datasets
         del found
     if  api == 'site4block':
         pass
     if  api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind':skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg  = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {'site':{'name':'Fail to look-up site info',
                 'error':msg, 'dataset_fraction': 'N/A',
                 'block_fraction':'N/A', 'block_completion':'N/A'},
                 'error': msg}
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if  isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if  int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site  = args.get('site')
         phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file':{'name':fname}}
Exemple #15
0
 def parser(self, query, dformat, source, api):
     """
     Phedex data-service parser.
     """
     tags = []
     if api == 'blockReplicas':
         prim_key = 'block'
     elif api == 'fileReplicas':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'fileReplicas4dataset':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'fileReplicas4file':
         prim_key = 'file'
         tags = 'block.name'
     elif api == 'dataset4site':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4se':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4site_group':
         prim_key = 'block'
         tags = 'block'
     elif api == 'dataset4se_group':
         prim_key = 'block'
         tags = 'block'
     elif api == 'site4dataset':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'site4block':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'site4file':
         prim_key = 'block'
         tags = 'block.replica.node'
     elif api == 'nodes':
         prim_key = 'node'
     elif api == 'nodeusage':
         prim_key = 'node'
     elif api == 'groups':
         prim_key = 'group'
     elif api == 'groupusage':
         prim_key = 'node'
     elif api == 'lfn2pfn':
         prim_key = 'mapping'
     elif api == 'tfc':
         prim_key = 'storage-mapping'
     else:
         msg = 'PhedexService::parser, unsupported %s API %s' \
             % (self.name, api)
         raise Exception(msg)
     gen = xml_parser(source, prim_key, tags)
     site_names = []
     seen = set()
     tot_files = 0
     site_info_dict = {}
     for row in gen:
         if api == 'nodeusage':
             if 'node' in row and 'name' in row['node']:
                 row['name'] = row['node']['name']
         if 'block' in row and 'name' in row['block']:
             if 'dataset' not in row['block']:
                 dataset = row['block']['name'].split('#')[0]
                 row['block']['dataset'] = dataset
         if api == 'site4dataset' or api == 'site4block':
             item = row['block']['replica']
             if isinstance(item, list):
                 for replica in item:
                     result = get_replica_info(replica)
                     site_info(site_info_dict, row['block'], replica)
                     if not replica['files']:
                         continue
                     if result not in site_names:
                         site_names.append(result)
             elif isinstance(item, dict):
                 replica = item
                 result = get_replica_info(replica)
                 site_info(site_info_dict, row['block'], replica)
                 if not replica['files']:
                     continue
                 result = get_replica_info(replica)
                 if result not in site_names:
                     site_names.append(result)
         elif api == 'site4file':
             item = row['block']['file']['replica']
             if isinstance(item, list):
                 for replica in item:
                     result = get_replica_info(replica)
                     if result not in site_names:
                         site_names.append(result)
             elif isinstance(item, dict):
                 replica = item
                 result = get_replica_info(replica)
                 if result not in site_names:
                     site_names.append(result)
         elif  api == 'dataset4site' or api == 'dataset4se' or \
             api == 'dataset4site_group' or api == 'dataset4se_group':
             if 'block' in row:
                 dataset = row['block']['name'].split('#')[0]
                 seen.add(dataset)
         elif  api == 'fileReplicas' or api == 'fileReplicas4file' or \
             api == 'fileReplicas4dataset':
             try:
                 if 'file' in row and isinstance(row['file'], dict):
                     rec = row['file']
                     cksum = rec['checksum']
                     for item in cksum.split(','):
                         key, val = item.split(':')
                         if key == 'cksum':
                             rec['checksum'] = int(val)
                         else:
                             rec[key] = val
             except:
                 pass
             yield row
         else:
             yield row
     if api == 'site4dataset' or api == 'site4block':
         for row in site_names:
             name = row['name']
             if name in site_info_dict:
                 sdict = site_info_dict[name]
                 sfiles = float(sdict['files'])
                 tot_files = float(sdict['totfiles'])
                 file_occ = '%5.2f%%' % (100 * sfiles / tot_files)
             else:
                 file_occ = '0%%'
             row['replica_fraction'] = file_occ.strip()
             yield row
     if api == 'site4file':
         for row in site_names:
             yield row
     del site_names
     del site_info_dict
     if seen:
         for dataset in seen:
             yield {'dataset': dict(name=dataset)}
     del seen
Exemple #16
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if args['site'].find('.') != -1:  # it is SE
             phedex_args = {
                 'dataset': list(datasets),
                 'se': '%s' % args['site']
             }
         else:
             phedex_args = {
                 'dataset': list(datasets),
                 'node': '%s*' % args['site']
             }
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {
                     'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files
                 }
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset': record}
         del datasets
         del found
     if api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind': skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {
                 'site': {
                     'name': 'Fail to look-up site info',
                     'error': msg,
                     'dataset_fraction': 'N/A',
                     'block_fraction': 'N/A',
                     'block_completion': 'N/A'
                 },
                 'error': msg
             }
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site = args.get('site')
         phedex_api = phedex_url.replace('/json/',
                                         '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file': {'name': fname}}
Exemple #17
0
def site4dataset(dbs_url, phedex_api, args, expire):
    "Yield site information about given dataset"
    # DBS part
    dataset = args['dataset']
    try:
        totblocks, totfiles = dataset_summary(dbs_url, dataset)
    except Exception as err:
        error = 'combined service unable to process your request'
        reason = "Fail to parse #block, #files info, %s" % str(err)
        yield {
            'site': {
                'name': 'N/A',
                'se': 'N/A',
                'error': error,
                'reason': reason
            }
        }
        return
    # Phedex part
    phedex_args = {'dataset': args['dataset']}
    headers = {'Accept': 'text/xml'}
    source, expire = \
        getdata(phedex_api, phedex_args, headers, expire, system='phedex')
    prim_key = 'block'
    tags = 'block.replica.node'
    site_info = {}
    for rec in xml_parser(source, prim_key, tags):
        ddict = DotDict(rec)
        replicas = ddict.get('block.replica')
        if not isinstance(replicas, list):
            replicas = [replicas]
        for row in replicas:
            if not row or 'node' not in row:
                continue
            node = row['node']
            files = int(row['files'])
            complete = 1 if row['complete'] == 'y' else 0
            if node in site_info:
                files = site_info[node]['files'] + files
                nblks = site_info[node]['blocks'] + 1
                bc_val = site_info[node]['blocks_complete']
                b_complete = bc_val + 1 if complete else bc_val
            else:
                b_complete = 1 if complete else 0
                nblks = 1
            site_info[node] = {
                'files': files,
                'blocks': nblks,
                'blocks_complete': b_complete
            }
    row = {}
    for key, val in site_info.items():
        if totfiles:
            nfiles = '%5.2f%%' % (100 * float(val['files']) / totfiles)
        else:
            nfiles = 'N/A'
        if totblocks:
            nblks = '%5.2f%%' % (100 * float(val['blocks']) / totblocks)
        else:
            nblks = 'N/A'
        ratio = float(val['blocks_complete']) / val['blocks']
        b_completion = '%5.2f%%' % (100 * ratio)
        row = {
            'site': {
                'name': key,
                'dataset_fraction': nfiles,
                'block_fraction': nblks,
                'block_completion': b_completion
            }
        }
        yield row
Exemple #18
0
 def parser(self, dasquery, dformat, source, api):
     """
     DBS data-service parser.
     """
     sitedb = SERVICES.get('sitedb2', None) # look-up SiteDB from global scope
     query = dasquery.mongo_query
     if  api == 'listBlocks':
         prim_key = 'block'
     elif api == 'listBlocks4path':
         api = 'listBlocks'
         prim_key = 'block'
     elif api == 'listBlockProvenance':
         prim_key = 'block'
     elif api == 'listBlockProvenance4child':
         prim_key = 'block'
     elif api == 'listFiles':
         prim_key = 'file'
     elif api == 'listLFNs':
         prim_key = 'file_lfn'
     elif api == 'listFileLumis':
         prim_key = 'file_lumi_section'
     elif api == 'listFileProcQuality':
         prim_key = 'file_proc_quality'
     elif api == 'listFileParents':
         prim_key = 'file_parent'
     elif api == 'listTiers':
         prim_key = 'data_tier'
     elif api == 'listDatasetParents':
         prim_key = 'processed_dataset_parent'
     elif api == 'listPrimaryDatasets':
         prim_key = 'primary_dataset'
     elif api == 'listProcessedDatasets':
         prim_key = 'processed_dataset'
     elif api == 'fakeReleases':
         prim_key = 'release'
     elif api == 'listRuns':
         prim_key = 'run'
     elif  api == 'fakeRelease4File':
         prim_key = 'release'
     elif  api == 'fakeRelease4Dataset':
         prim_key = 'release'
     elif  api == 'fakeGroup4Dataset':
         prim_key = 'group'
     elif  api == 'fakeConfig':
         prim_key = 'config'
     elif  api == 'fakeListDataset4Block':
         prim_key = 'dataset'
     elif  api == 'fakeListDataset4File':
         prim_key = 'dataset'
     elif  api == 'fakeListDatasetbyDate':
         prim_key = 'dataset'
     elif  api == 'fakeDatasetSummary':
         prim_key = 'dataset'
     elif  api == 'fakeDataset4Run':
         prim_key = 'dataset'
     elif  api == 'fakeRun4File':
         prim_key = 'run'
     elif  api == 'fakeRun4Run':
         prim_key = 'run'
     elif api == 'fakeChild4File':
         prim_key = 'child'
     elif api == 'fakeChild4Dataset':
         prim_key = 'child'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     elif api == 'fakeStatus':
         prim_key = 'status'
     elif api == 'fakeFiles4DatasetRunLumis':
         prim_key = 'file'
     elif api == 'fakeRun4Block':
         prim_key = 'run'
     elif api == 'fakeBlock4DatasetRun':
         prim_key = 'block'
     elif api == 'fakeSite4Dataset':
         prim_key = 'site'
     else:
         msg = 'DBSService::parser, unsupported %s API %s' \
             % (self.name, api)
         raise Exception(msg)
     if  api.find('fake') != -1:
         gen = qlxml_parser(source, prim_key)
     else:
         gen = xml_parser(source, prim_key)
     useless_run_atts = ['number_of_events', 'number_of_lumi_sections', \
             'id', 'total_luminosity', 'store_number', 'end_of_run', \
             'start_of_run']
     config_attrs = ['config.name', 'config.content', 'config.version', \
              'config.type', 'config.annotation', 'config.createdate', \
              'config.createby', 'config.moddate', 'config.modby']
     for row in gen:
         if  not row:
             continue
         if  row.has_key('status') and \
             row['status'].has_key('dataset.status'):
             row['status']['name'] = row['status']['dataset.status']
             del row['status']['dataset.status']
         if  row.has_key('file_lumi_section'):
             row['lumi'] = row['file_lumi_section']
             del row['file_lumi_section']
         if  row.has_key('algorithm'):
             del row['algorithm']['ps_content']
         if  row.has_key('processed_dataset') and \
             row['processed_dataset'].has_key('path'):
             if  isinstance(row['processed_dataset']['path'], dict) \
             and row['processed_dataset']['path'].has_key('dataset_path'):
                 path = row['processed_dataset']['path']['dataset_path']
                 del row['processed_dataset']['path']
                 row['processed_dataset']['name'] = path
         # case for fake apis
         # remove useless attribute from results
         if  row.has_key('dataset'):
             if  row['dataset'].has_key('count_file.size'):
                 del row['dataset']['count_file.size']
             if  row['dataset'].has_key('dataset'):
                 name = row['dataset']['dataset']
                 del row['dataset']['dataset']
                 row['dataset']['name'] = name
         if  row.has_key('child') and row['child'].has_key('dataset.child'):
             row['child']['name'] = row['child']['dataset.child']
             del row['child']['dataset.child']
         if  row.has_key('child') and row['child'].has_key('file.child'):
             row['child']['name'] = row['child']['file.child']
             del row['child']['file.child']
         if  row.has_key('block') and query.get('fields') == ['parent']:
             row['parent'] = row['block']
             del row['block']
         if  row.has_key('block') and query.get('fields') == ['child']:
             row['child'] = row['block']
             del row['block']
         if  row.has_key('run') and row['run'].has_key('run'):
             row['run']['run_number'] = row['run']['run']
             del row['run']['run']
         if  row.has_key('release') and row['release'].has_key('release'):
             row['release']['name'] = row['release']['release']
             del row['release']['release']
         if  row.has_key('site'):
             row['site']['se'] = row['site']['site']
             del row['site']['site']
         convert_dot(row, 'config', config_attrs)
         convert_dot(row, 'file', ['file.name'])
         convert_dot(row, 'block', ['block.name'])
         convert_dot(row, 'dataset', ['dataset.tag', 'dataset.status'])
         # remove DBS2 run attributes (to be consistent with DBS3 output)
         # and let people extract this info from CondDB/LumiDB.
         if  row.has_key('run'):
             for att in useless_run_atts:
                 try:
                     del row['run'][att]
                 except:
                     pass
         if  api == 'fakeSite4Dataset' and sitedb:
             site = row.get('site', None)
             if  site and isinstance(site, dict):
                 sename = site.get('se', None)
                 info = sitedb.site_info(sename)
                 if  info:
                     row['site'].update(info)
         yield row
Exemple #19
0
 def helper(self, url, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = url['dbs']
     phedex_url = url['phedex']
     if  api == 'combined_dataset4site_release':
         # DBS part
         datasets = set()
         for row in dbs_dataset4site_release(dbs_url, self.getdata, args['release']):
             datasets.add(row)
         # Phedex part
         if  args['site'].find('.') != -1: # it is SE
             phedex_args = {'dataset':list(datasets), 
                             'se': '%s' % args['site']}
         else:
             phedex_args = {'dataset':list(datasets), 
                             'node': '%s*' % args['site']}
         headers = {'Accept': 'text/xml'}
         source, expire = \
         self.getdata(phedex_url, phedex_args, expire, headers, post=True)
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if  found.has_key(found_dataset):
                 val = found[found_dataset]
                 found[found_dataset] = {'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files}
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.iteritems():
             record = dict(name=name, size=val['bytes'], files=val['files'],
                             combined=['dbs', 'phedex']) 
             yield {'dataset':record}
         del datasets
         del found
     if  api == 'combined_site4dataset':
         # DBS part
         dataset = args['dataset']
         totblocks, totfiles = \
             dataset_summary(dbs_url, self.getdata, dataset)
         # Phedex part
         phedex_args = {'dataset':args['dataset']}
         headers = {'Accept': 'text/xml'}
         source, expire = \
         self.getdata(phedex_url, phedex_args, expire, headers, post=True)
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         site_info = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             replicas = ddict.get('block.replica')
             if  not isinstance(replicas, list):
                 replicas = [replicas]
             for row in replicas:
                 if  not row or not row.has_key('node'):
                     continue
                 node = row['node']
                 files = int(row['files'])
                 complete = 1 if row['complete'] == 'y' else 0
                 if  site_info.has_key(node):
                     files = site_info[node]['files'] + files
                     nblks  = site_info[node]['blocks'] + 1
                     bc_val = site_info[node]['blocks_complete']
                     b_complete = bc_val+1 if complete else bc_val
                 else:
                     b_complete = 1 if complete else 0
                     nblks = 1
                 site_info[node] = {'files': files, 'blocks': nblks,
                             'blocks_complete': b_complete}
         row = {}
         for key, val in site_info.iteritems():
             if  totfiles:
                 nfiles = '%5.2f%%' % (100*float(val['files'])/totfiles)
             else:
                 nfiles = 'N/A'
             if  totblocks:
                 nblks  = '%5.2f%%' % (100*float(val['blocks'])/totblocks)
             else:
                 nblks = 'N/A'
             ratio = float(val['blocks_complete'])/val['blocks']
             b_completion = '%5.2f%%' % (100*ratio)
             row = {'site':{'name':key, 'dataset_fraction': nfiles,
                 'block_fraction': nblks, 'block_completion': b_completion}}
             yield row