Beispiel #1
0
def get_file_run_lumis(url, api, args, verbose=0):
    "Helper function to deal with file,run,lumi requests"
    run_value = args.get("run_num", [])
    if isinstance(run_value, dict) and "$in" in run_value:
        runs = run_value["$in"]
    elif isinstance(run_value, list):
        runs = run_value
    else:
        if int_number_pattern.match(str(run_value)):
            runs = [run_value]
        elif run_value[0] == "[" and run_value[-1] == "]":
            runs = json.loads(run_value)
        else:
            runs = []
    args.update({"runs": runs})
    blk = args.get("block_name", None)
    if blk:  # we don't need to look-up blocks
        blocks = [blk]
    else:
        blocks = dbs_find("block", url, args, verbose)
    if not blocks:
        return
    gen = file_run_lumis(url, blocks, runs, verbose)
    key = "file_run"
    if api.startswith("run_lumi"):
        key = "run"
    if api.startswith("file_lumi"):
        key = "file"
    if api.startswith("file_run_lumi"):
        key = "file_run"
    for row in process_lumis_with(key, gen):
        yield row
Beispiel #2
0
def adjust_value(val):
    "Adjust value to DAS patterns"
    if  date_yyyymmdd_pattern.match(val):
        return das_dateformat(val)
    elif  int_number_pattern.match(val):
        return int(val)
    return val
Beispiel #3
0
def adjust_value(val):
    "Adjust value to DAS patterns"
    if date_yyyymmdd_pattern.match(val):
        return das_dateformat(val)
    elif int_number_pattern.match(val):
        return int(val)
    return val
Beispiel #4
0
def url_args(url, convert_types=False):
    """
    Extract args from given url, e.g. http://a.b.com/api?arg1=1&arg2=2
    will yield {'arg1':1, 'arg2':2}
    """
    args = {}
    for item in url.split("?")[-1].split('&'):
        key, value = item.split('=')
        if convert_types:
            if int_number_pattern.match(value):
                args[key] = int(value)
            else:
                args[key] = value
        else:
            args[key] = value
    return args
Beispiel #5
0
def url_args(url, convert_types=False):
    """
    Extract args from given url, e.g. http://a.b.com/api?arg1=1&arg2=2
    will yield {'arg1':1, 'arg2':2}
    """
    args = {}
    for item in url.split("?")[-1].split('&'):
        key, value = item.split('=')
        if  convert_types:
            if  int_number_pattern.match(value):
                args[key] = int(value)
            else:
                args[key] = value
        else:
            args[key] = value
    return args
Beispiel #6
0
def size_format(uinput):
    """
    Format file size utility, it converts file size into KB, MB, GB, TB, PB units
    """
    if  not (float_number_pattern.match(str(uinput)) or \
                int_number_pattern.match(str(uinput))):
        return 'N/A'
    try:
        num = float(uinput)
    except Exception as exc:
        print_exc(exc)
        return "N/A"
    base = 1000. # power of 10, or use 1024. for power of 2
    for xxx in ['', 'KB', 'MB', 'GB', 'TB', 'PB']:
        if  num < base: 
            return "%3.1f%s" % (num, xxx)
        num /= base
Beispiel #7
0
def get_block_run_lumis(url, api, args, verbose=0):
    "Helper function to deal with block,run,lumi requests"
    run_value = args.get('run_num', [])
    if  isinstance(run_value, dict) and '$in' in run_value:
        runs = run_value['$in']
    elif isinstance(run_value, list):
        runs = run_value
    else:
        if  int_number_pattern.match(str(run_value)):
            runs = [run_value]
        else:
            runs = []
    args.update({'runs': runs})
    blocks = dbs_find('block', url, args, verbose)
    gen = block_run_lumis(url, blocks, runs, verbose)
    key = 'block_run'
    for row in process_lumis_with(key, gen):
        yield row
Beispiel #8
0
def get_file_run_lumis(url, api, args, verbose=0):
    "Helper function to deal with file,run,lumi requests"
    run_value = args.get('run_num', [])
    if  isinstance(run_value, dict) and '$in' in run_value:
        runs = run_value['$in']
    elif isinstance(run_value, list):
        runs = run_value
    else:
        if  int_number_pattern.match(str(run_value)):
            runs = [run_value]
        elif run_value[0]=='[' and run_value[-1]==']':
            if  '-' in run_value: # continuous range
                runs = run_value.replace("'", '').replace('[', '').replace(']', '')
            else:
                runs = json.loads(run_value)
        else:
            runs = run_value
    args.update({'runs': runs})
    blk = args.get('block_name', None)
    if  blk: # we don't need to look-up blocks
        blocks = [blk]
    else:
        blocks = dbs_find('block', url, args, verbose)
    if  not blocks:
        return
    valid = 1 if args.get('validFileOnly', '') else 0
    gen = file_run_lumis(url, blocks, runs, valid, verbose)
    key = 'file_run'
    if  api.startswith('run_lumi'):
        key = 'run'
    if  api.startswith('file_lumi'):
        key = 'file'
    if  api.startswith('file_run'):
        key = 'file_run'
    if  api.startswith('file_run_lumi'):
        key = 'file_run'
    if  api.startswith('file_run_lumi_events'):
        key = 'file_run'
    for row in process_lumis_with(key, gen):
        yield row
Beispiel #9
0
def get_file_run_lumis(url, api, args):
    "Helper function to deal with file,run,lumi requests"
    run_value = args.get('run', [])
    if  isinstance(run_value, dict) and '$in' in run_value:
        runs = run_value['$in']
    elif isinstance(run_value, list):
        runs = run_value
    else:
        if  int_number_pattern.match(str(run_value)):
            runs = [run_value]
        else:
            runs = []
    args.update({'runs': runs})
    blocks = dbs_find('block', url, args)
    gen = file_run_lumis(url, blocks, runs)
    if  api.startswith('run_lumi'):
        key = 'run'
    if  api.startswith('file_lumi'):
        key = 'file'
    if  api.startswith('file_run_lumi'):
        key = 'file_run'
    for row in process_lumis_with(key, gen):
        yield row
Beispiel #10
0
def lumi_evts(rdict):
    "Helper function to show lumi-events pairs suitable for web UI"
    run = rdict['Run number']
    lumis = rdict['Luminosity number']
    events = rdict['Events']
    pdict = dict(zip(lumis, events))
    pkeys = [str(k) for k in pdict.keys()]
    tag = 'id_%s_%s' % (run, ''.join(pkeys))
    link = 'link_%s_%s' % (run, ''.join(pkeys))
    hout = '<div class="hide" id="%s" name="%s">' % (tag, tag)
    tot_evts = 0
    for idx, lumi in enumerate(sorted(pdict.keys())):
        evts = pdict[lumi]
        if  evts != 'NA' and evts and int_number_pattern.match(str(evts)):
            tot_evts += int(evts)
        hout += 'Lumi: %s, Events %s<br/>' % (lumi, evts)
    hout += "</div>"
    out = """&nbsp;<em>lumis/events pairs</em>\
    <a href="javascript:ToggleTag('%s', '%s')" id="%s">show</a>""" \
            % (tag, link, link)
    if  tot_evts:
        out += '&nbsp; Total events=%s' % tot_evts
    out += hout
    return out
Beispiel #11
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if  args['site'].find('.') != -1: # it is SE
             phedex_args = {'dataset':list(datasets),
                             'se': '%s' % args['site']}
         else:
             phedex_args = {'dataset':list(datasets),
                             'node': '%s*' % args['site']}
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if  found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files}
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if  api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset':record}
         del datasets
         del found
     if  api == 'site4block':
         pass
     if  api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind':skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg  = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {'site':{'name':'Fail to look-up site info',
                 'error':msg, 'dataset_fraction': 'N/A',
                 'block_fraction':'N/A', 'block_completion':'N/A'},
                 'error': msg}
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if  isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if  int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site  = args.get('site')
         phedex_api = phedex_url.replace('/json/', '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file':{'name':fname}}
Beispiel #12
0
 def helper(self, api, args, expire):
     """
     Class helper function which yields results for given
     set of input parameters. It yeilds the data record which
     must contain combined attribute corresponding to systems
     used to produce record content.
     """
     dbs_url = self.map[api]['services'][self.dbs]
     phedex_url = self.map[api]['services']['phedex']
     # make phedex_api from url, but use xml version for processing
     phedex_api = phedex_url.replace('/json/', '/xml/') + '/blockReplicas'
     if  api == 'dataset4site_release' or \
         api == 'dataset4site_release_parent' or \
         api == 'child4site_release_dataset':
         # DBS part
         datasets = set()
         release = args['release']
         parent = args.get('parent', None)
         for row in dbs_dataset4release_parent(dbs_url, release, parent):
             datasets.add(row)
         # Phedex part
         if args['site'].find('.') != -1:  # it is SE
             phedex_args = {
                 'dataset': list(datasets),
                 'se': '%s' % args['site']
             }
         else:
             phedex_args = {
                 'dataset': list(datasets),
                 'node': '%s*' % args['site']
             }
         headers = {'Accept': 'text/xml'}
         source, expire = \
             getdata(phedex_api, phedex_args, headers, expire, system='phedex')
         prim_key = 'block'
         tags = 'block.replica.node'
         found = {}
         for rec in xml_parser(source, prim_key, tags):
             ddict = DotDict(rec)
             block = ddict.get('block.name')
             bbytes = ddict.get('block.bytes')
             files = ddict.get('block.files')
             found_dataset = block.split('#')[0]
             if found_dataset in found:
                 val = found[found_dataset]
                 found[found_dataset] = {
                     'bytes': val['bytes'] + bbytes,
                     'files': val['files'] + files
                 }
             else:
                 found[found_dataset] = {'bytes': bbytes, 'files': files}
         for name, val in found.items():
             record = dict(name=name, size=val['bytes'], files=val['files'])
             if api == 'child4site_release_dataset':
                 yield {'child': record}
             else:
                 yield {'dataset': record}
         del datasets
         del found
     if api == 'site4dataset':
         try:
             gen = site4dataset(dbs_url, phedex_api, args, expire)
             for row in gen:
                 sname = row.get('site', {}).get('name', '')
                 skind = self.site_info(phedex_url, sname)
                 row['site'].update({'kind': skind})
                 yield row
         except Exception as err:
             print_exc(err)
             tstamp = dastimestamp('')
             msg = tstamp + ' Exception while processing DBS/Phedex info:'
             msg += str(err)
             row = {
                 'site': {
                     'name': 'Fail to look-up site info',
                     'error': msg,
                     'dataset_fraction': 'N/A',
                     'block_fraction': 'N/A',
                     'block_completion': 'N/A'
                 },
                 'error': msg
             }
             yield row
     if  api == 'files4dataset_runs_site' or \
         api == 'files4block_runs_site':
         run_value = args.get('run', [])
         if isinstance(run_value, dict) and '$in' in run_value:
             runs = run_value['$in']
         elif isinstance(run_value, list):
             runs = run_value
         else:
             if int_number_pattern.match(str(run_value)):
                 runs = [run_value]
             else:
                 runs = []
         args.update({'runs': runs})
         files = dbs_find('file', dbs_url, args)
         site = args.get('site')
         phedex_api = phedex_url.replace('/json/',
                                         '/xml/') + '/fileReplicas'
         for fname in files4site(phedex_api, files, site):
             yield {'file': {'name': fname}}
Beispiel #13
0
def parse_filter(spec, flt):
    """
    Parse given filter and return MongoDB key/value dictionary.
    Be smart not to overwrite spec condition of DAS query.
    """
    if  flt.find('=') != -1 and flt.find('!=') == -1 and\
       (flt.find('<') == -1 and flt.find('>') == -1):
        key, val = flt.split('=')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        elif float_number_pattern.match(str(val)):
            val = float(val)
        elif isinstance(val, str) or isinstance(val, unicode):
            if  val.find('*') != -1:
                val = re.compile('%s' % val.replace('*', '.*'))
            val = parse_filter_string(val)
        return {key:val}
    elif flt.find('!=') != -1 and \
       (flt.find('<') == -1 and flt.find('>') == -1):
        key, val = flt.split('!=')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        elif float_number_pattern.match(str(val)):
            val = float(val)
        elif isinstance(val, str) or isinstance(val, unicode):
            if  val.find('*') != -1:
#                val = re.compile('%s' % val.replace('*', '.*'))
                val = re.compile('^(?:(?!%s).)*$' % val.replace('*', '.*'))
            else:
                val = re.compile('^(?:(?!%s).)*$' % val)
            val = parse_filter_string(val)
            return {key: val}
        return {key: {'$ne': val}}
    elif  flt.find('<=') != -1:
        key, val = flt.split('<=')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        if  float_number_pattern.match(str(val)):
            val = float(val)
        return {key: {'$lte': val}}
    elif  flt.find('<') != -1:
        key, val = flt.split('<')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        if  float_number_pattern.match(str(val)):
            val = float(val)
        return {key: {'$lt': val}}
    elif  flt.find('>=') != -1:
        key, val = flt.split('>=')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        if  float_number_pattern.match(str(val)):
            val = float(val)
        return {key: {'$gte': val}}
    elif  flt.find('>') != -1:
        key, val = flt.split('>')
        if  int_number_pattern.match(str(val)):
            val = int(val)
        if  float_number_pattern.match(str(val)):
            val = float(val)
        return {key: {'$gt': val}}
    else:
        if  not spec.get(flt, None) and flt != 'unique':
            return {flt:{'$exists':True}}
    return {}
Beispiel #14
0
    def parse(self, query):
        "Parse input query"
        spec = {}
        filters = {}
        aggregators = []
        fields = []
        keys = []
        pipe = []
        relaxed_query = relax(query, self.operators).split()
        if  self.verbose:
            print("\n### input query=%s, relaxed=%s" % (query, relaxed_query))
        tot = len(relaxed_query)
        idx = 0
        while idx < tot:
            item = relaxed_query[idx]
            if  self.verbose > 1:
                print("parse item", item)
            if  item == '|':
                step = self.parse_pipe(relaxed_query[idx:], filters, aggregators)
                idx += step
            if  item == ',':
                idx += 1
                continue
            next_elem = relaxed_query[idx+1] if idx+1 < tot else None
            next_next_elem = relaxed_query[idx+2] if idx+2 < tot else None
            if  self.verbose > 1:
                print("### parse items", item, next_elem, next_next_elem)
            if  next_elem and (next_elem == ',' or next_elem in self.daskeys):
                if  item in self.daskeys:
                    fields.append(item)
                idx += 1
                continue
            elif next_elem in self.operators:
                oper = next_elem
                if  item not in self.daskeys+self.specials:
                    error(relaxed_query, idx, 'Wrong DAS key')
                if  next_next_elem.startswith('['):
                    val, step = parse_array(relaxed_query[idx:], next_elem, item)
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                elif next_elem in ['in', 'beetween'] and \
                     not next_next_elem.startswith('['):
                    msg = '"%s" operator ' % next_elem
                    msg += 'should be followed by square bracket value'
                    error(relaxed_query, idx, msg)
                elif next_next_elem.startswith('"'):
                    val, step = parse_quotes(relaxed_query[idx:], '"')
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                elif next_next_elem.startswith("'"):
                    val, step = parse_quotes(relaxed_query[idx:], "'")
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                else:
                    if  float_number_pattern.match(next_next_elem):
                        next_next_elem = float(next_next_elem)
                    elif int_number_pattern.match(next_next_elem) and \
                        not date_yyyymmdd_pattern.match(next_next_elem):
                        next_next_elem = int(next_next_elem)
                    elif next_next_elem in self.daskeys:
                        msg = 'daskey operator daskey structure is not allowed'
                        error(relaxed_query, idx, msg)
                    spec.update(spec_entry(item, next_elem, next_next_elem))
                    idx += 3
                continue
            elif item == '|':
                step = self.parse_pipe(relaxed_query[idx:], filters, aggregators)
                idx += step
            elif not next_elem and not next_next_elem:
                if  item in self.daskeys:
                    fields.append(item)
                    idx += 1
                else:
                    error(relaxed_query, idx, 'Not a DAS key')
            else:
                error(relaxed_query, idx)
        out = {}
        for word in ['instance', 'system']:
            if  word in spec:
                out[word] = spec.pop(word)
        if  not fields:
            fields = [k for k in spec.keys() if k in self.daskeys]
            if  len(fields) > 1:
                fields = None # ambiguous spec, we don't know which field to look-up
        if  fields and not spec:
            error(relaxed_query, 0, 'No conditition specified')
        out['fields'] = fields
        out['spec'] = spec
        # perform cross-check of filter values
        for key, item in filters.items():
            if  key not in ['grep', 'sort']:
                continue
            for val in item:
                daskeyvalue_check(query, val, self.daskeys)
        # perform cross-check of aggregator values
        for _, val in aggregators:
            daskeyvalue_check(query, val, self.daskeys)
        if  filters:
            out['filters'] = filters
        if  aggregators:
            out['aggregators'] = aggregators

        if  self.verbose:
            print("MongoDB query: %s" % out)
        return out
Beispiel #15
0
    def parse_helper(self, query):
        "Parse input query"
        spec = {}
        filters = {}
        aggregators = []
        fields = []
        keys = []
        pipe = []
        relaxed_query = relax(query, self.operators).split()
        if self.verbose:
            print("\n### input query=%s, relaxed=%s" % (query, relaxed_query))
        tot = len(relaxed_query)
        idx = 0
        while idx < tot:
            item = relaxed_query[idx]
            if self.verbose > 1:
                print("parse item", item)
            if item == '|':
                step = self.parse_pipe(relaxed_query[idx:], filters,
                                       aggregators)
                idx += step
            if item == ',':
                idx += 1
                continue
            next_elem = relaxed_query[idx + 1] if idx + 1 < tot else None
            next_next_elem = relaxed_query[idx + 2] if idx + 2 < tot else None
            if self.verbose > 1:
                print("### parse items", item, next_elem, next_next_elem)
            if next_elem and (next_elem == ',' or next_elem in self.daskeys):
                if item in self.daskeys:
                    fields.append(item)
                idx += 1
                continue
            elif next_elem in self.operators:
                oper = next_elem
                if item not in self.daskeys + self.specials:
                    error(relaxed_query, idx, 'Wrong DAS key')
                if next_next_elem.startswith('['):
                    val, step = parse_array(relaxed_query[idx:], next_elem,
                                            item)
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                elif next_elem in ['in', 'beetween'] and \
                     not next_next_elem.startswith('['):
                    msg = '"%s" operator ' % next_elem
                    msg += 'should be followed by square bracket value'
                    error(relaxed_query, idx, msg)
                elif next_next_elem.startswith('"'):
                    val, step = parse_quotes(relaxed_query[idx:], '"')
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                elif next_next_elem.startswith("'"):
                    val, step = parse_quotes(relaxed_query[idx:], "'")
                    spec.update(spec_entry(item, next_elem, val))
                    idx += step
                else:
                    if float_number_pattern.match(next_next_elem):
                        next_next_elem = float(next_next_elem)
                    elif int_number_pattern.match(next_next_elem) and \
                        not date_yyyymmdd_pattern.match(next_next_elem):
                        next_next_elem = int(next_next_elem)
                    elif next_next_elem in self.daskeys:
                        msg = 'daskey operator daskey structure is not allowed'
                        error(relaxed_query, idx, msg)
                    spec.update(spec_entry(item, next_elem, next_next_elem))
                    idx += 3
                continue
            elif item == '|':
                step = self.parse_pipe(relaxed_query[idx:], filters,
                                       aggregators)
                idx += step
            elif not next_elem and not next_next_elem:
                if item in self.daskeys:
                    fields.append(item)
                    idx += 1
                else:
                    error(relaxed_query, idx, 'Not a DAS key')
            else:
                error(relaxed_query, idx)
        out = {}
        for word in ['instance', 'system']:
            if word in spec:
                out[word] = spec.pop(word)
        if not fields:
            fields = [k for k in spec.keys() if k in self.daskeys]
            if len(fields) > 1:
                fields = None  # ambiguous spec, we don't know which field to look-up
        if fields and not spec:
            error(relaxed_query, 0, 'No conditition specified')
        out['fields'] = fields
        out['spec'] = spec
        # perform cross-check of filter values
        for key, item in filters.items():
            if key not in ['grep', 'sort']:
                continue
            for val in item:
                daskeyvalue_check(query, val, self.daskeys)
        # perform cross-check of aggregator values
        for _, val in aggregators:
            daskeyvalue_check(query, val, self.daskeys)
        if filters:
            out['filters'] = filters
        if aggregators:
            out['aggregators'] = aggregators

        if self.verbose:
            print("MongoDB query: %s" % out)
        return out