Beispiel #1
0
    def test_adjust_value(self):
        """Test adjust_value"""
        expect = 0
        result = adjust_value("0")
        self.assertEqual(expect, result)

        expect = 1
        result = adjust_value("1")
        self.assertEqual(expect, result)

        expect = 1.1
        result = adjust_value("1.1")
        self.assertEqual(expect, result)

        expect = -1.1
        result = adjust_value("-1.1")
        self.assertEqual(expect, result)

        expect = '2009.05.19 17:41:25'
        result = adjust_value("2009.05.19 17:41:25")
        self.assertEqual(expect, result)

        expect = None
        result = adjust_value("null")
        self.assertEqual(expect, result)

        expect = None
        result = adjust_value("(null)")
        self.assertEqual(expect, result)
Beispiel #2
0
    def test_adjust_value(self):
        """Test adjust_value"""
        expect = 0
        result = adjust_value("0")
        self.assertEqual(expect, result)

        expect = 1
        result = adjust_value("1")
        self.assertEqual(expect, result)

        expect = 1.1
        result = adjust_value("1.1")
        self.assertEqual(expect, result)

        expect = -1.1
        result = adjust_value("-1.1")
        self.assertEqual(expect, result)

        expect = '2009.05.19 17:41:25'
        result = adjust_value("2009.05.19 17:41:25")
        self.assertEqual(expect, result)

        expect = None
        result = adjust_value("null")
        self.assertEqual(expect, result)

        expect = None
        result = adjust_value("(null)")
        self.assertEqual(expect, result)
Beispiel #3
0
 def parser(self, source, api):
     """
     RunSummary data-service parser.
     """
     row     = {}
     hold    = None
     context = ET.iterparse(source, events=("start", "end"))
     root    = None
     for item in context:
         event, elem = item
         if  event == "start" and root is None:
             root = elem # the first element is root
         if  elem.tag == 'cmsdb':
             continue
         if  event == 'start' and elem.tag == 'runInfo':
             continue
         if  event == 'end' and elem.tag == 'runInfo':
             yield dict(run=row)
             row  = {}
             elem.clear()
         if  hold and event == 'end' and elem.tag == hold:
             hold = None
             continue
         if  event == 'start':
             sub = {}
             children = elem.getchildren()
             # I don't apply notation conversion to all children
             # since those are not likely to overlap
             if  children:
                 for child in children:
                     sub[child.tag] = adjust_value(child.text)
                 row[elem.tag] = sub
                 hold = elem.tag
             else:
                 if  not hold:
                     nkey = self.dasmapping.notation2das\
                         (self.name, elem.tag, api)
                     row[nkey] = adjust_value(elem.text)
     if  root:
         root.clear()
     source.close()
Beispiel #4
0
 def parser(self, source, api):
     """
     RunSummary data-service parser.
     """
     row     = {}
     hold    = None
     context = ET.iterparse(source, events=("start", "end"))
     root    = None
     for item in context:
         event, elem = item
         if  event == "start" and root is None:
             root = elem # the first element is root
         if  elem.tag == 'cmsdb':
             continue
         if  event == 'start' and elem.tag == 'runInfo':
             continue
         if  event == 'end' and elem.tag == 'runInfo':
             yield dict(run=row)
             row  = {}
             elem.clear()
         if  hold and event == 'end' and elem.tag == hold:
             hold = None
             continue
         if  event == 'start':
             sub = {}
             children = elem.getchildren()
             # I don't apply notation conversion to all children
             # since those are not likely to overlap
             if  children:
                 for child in children:
                     sub[child.tag] = adjust_value(child.text)
                 row[elem.tag] = sub
                 hold = elem.tag
             else:
                 if  not hold:
                     nkey = self.dasmapping.notation2das\
                         (self.name, elem.tag, api)
                     row[nkey] = adjust_value(elem.text)
     if  root:
         root.clear()
     source.close()
Beispiel #5
0
def worker_v2(url, query):
    """
    Query RunRegistry service, see documentation at
    https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi
    url=http://pccmsdqm04.cern.ch/runregistry/xmlrpc
    """
    server    = xmlrpclib.ServerProxy(url)
    namespace = 'GLOBAL'
    if  isinstance(query, str) or isinstance(query, unicode):
        try:
            data = server.RunLumiSectionRangeTable.exportJson(namespace, query)
        except Exception as _err:
            data = "{}" # empty response
        for row in json.loads(data):
            yield row
    elif isinstance(query, dict):
        iformat  = 'tsv_runs' # other formats are xml_all, csv_runs
        try:
            data = server.RunDatasetTable.export(namespace, iformat, query)
        except Exception as _err:
            data = "" # empty response
        titles = []
        for line in data.split('\n'):
            if  not line:
                continue
            if  not titles:
                for title in line.split('\t')[:-1]:
                    title = title.lower()
                    if  title != 'run_number':
                        title = title.replace('run_', '')
                    titles.append(title)
                continue
            val = line.split('\t')[:-1]
            if  len(val) != len(titles):
                continue
            record = {}
            for idx in range(0, len(titles)):
                key = titles[idx]
                record[key] = adjust_value(val[idx])
            yield dict(run=record)
Beispiel #6
0
    def requestquery(self, query, add_to_analytics=True):
        """
        Query analyzer which form request query to DAS from a free text-based form.
        Return MongoDB request query.
        """
        # strip operators while we will match words against them
        operators = [o.strip() for o in self.operators]

        # find out if input query contains filters/mapreduce functions
        mapreduce = []
        filters   = []
        aggregators = []
        pat = re.compile(r"^([a-z_]+\.?)+$") # match key.attrib
        if  query and type(query) is types.StringType:
            if  query.find("|") != -1:
                split_results = query.split("|")
                query = split_results[0]
                for item in split_results[1:]:
                    func = item.split("(")[0].strip()
                    for filter in self.filters:
                        if  item.find(filter) == -1:
                            continue
                        for elem in item.replace(filter, '').split(','):
                            dasfilter = elem.strip()
                            if  not dasfilter:
                                continue
                            if  not pat.match(dasfilter):
                                msg = 'Incorrect filter: %s' % dasfilter
                                raise Exception(msg)
                            if  dasfilter not in filters:
                                filters.append(dasfilter)
                    if func in self.aggregators:
                        aggregators = [agg for agg in get_aggregator(item)]
                    else:
                        mapreduce.append(item)
#                mapreduce = [i.strip() for i in split_results[1:]]
            query = query.strip()
            if  query[0] == "{" and query[-1] == "}":
                mongo_query = json.loads(query)
                if  mongo_query.keys() != ['fields', 'spec']:
                    raise Exception("Invalid MongoDB query %s" % query)
                if  add_to_analytics:
                    self.analytics.add_query(query, mongo_query)
                return mongo_query

        # check input query and prepare it for processing
        findbracketobj(query) # check brackets in a query
        skeys = []
        query = query.strip().replace(",", " ")
        query = add_spaces(query, operators)
        slist = query.split()
        idx   = 0

        # main loop, step over words in query expression and
        # findout selection keys and conditions
        condlist = []
        while True:
            if  idx >= len(slist):
                break
            word = slist[idx].strip()
            if  word in self.daskeys: # look-up for selection keys
                try:
                    next_word = slist[idx+1]
                    if  next_word not in operators and word not in skeys:
                        skeys.append(word)
                except:
                    pass
                if  word == slist[-1] and word not in skeys: # last word
                    skeys.append(word)
            elif word in operators: # look-up conditions
                oper = word
                prev_word = slist[idx-1]
                next_word = slist[idx+1]
                if  word in ['in', 'nin']:
                    first = next_word
                    if  first.find('[') == -1:
                        msg = 'No open bracket [ found in query expression'
                        raise Exception(msg)
                    arr = []
                    found_last = False
                    for item in slist[idx+1:]:
                        if  item.find(']') != -1:
                            found_last = True
                        val = item.replace('[', '').replace(']', '')
                        if  val:
                            arr.append(val)
                    if  not found_last:
                        msg = 'No closed bracket ] found in query expression'
                        raise Exception(msg)
                    value = arr
                elif word == 'last':
                    value = convert2date(next_word)
                    cdict = dict(key='date', op='in', value=value)
                    condlist.append(cdict)
                    value = None
                else:
                    value = next_word
                if  prev_word == 'date':
                    if  word != 'last': # we already converted date
                        if  type(value) is types.StringType:
                            value = [das_dateformat(value), time.time()]
                        elif type(value) is types.ListType:
                            try:
                                value1 = das_dateformat(value[0])
                                value2 = das_dateformat(value[1])
                                value  = [value1, value2]
                            except:
                                msg = "Unable to parse %s" % value
                                raise Exception(msg)
                    cdict = dict(key='date', op='in', value=value)
                    condlist.append(cdict)
                    value = None
                idx += 1
                if  not value:
                    continue
                key = prev_word
                value = adjust_value(value)
                if  key == 'date':
                    cdict = dict(key=key, op=oper, value=value)
                    condlist.append(cdict)
                    continue
                for system in self.map.list_systems():
                    mapkey = self.map.find_mapkey(system, key)
                    if  mapkey:
                        cdict = dict(key=mapkey, op=oper, value=value)
                        if  cdict not in condlist:
                            condlist.append(cdict)
            else:
                if  word not in skeys and word in self.daskeys:
                    skeys.append(word)
            idx += 1
        if  not condlist and skeys: # e.g. --query="dataset"
            for key in skeys:
                for system, daskeys in self.map.daskeys().items():
                    if  key in daskeys:
                        mapkey = self.map.find_mapkey(system, key)
                        cdict = dict(key=mapkey, op="=", value="*")
                        condlist.append(cdict)
                        break
#        print "\n### condlist", condlist
        spec = mongo_exp(condlist)
#        print "### spec", spec
        if  skeys:
            fields = skeys
        else:
            fields = None
        mongo_query = dict(fields=fields, spec=spec)
        # add mapreduce if it exists
        if  mapreduce:
            mongo_query['mapreduce'] = mapreduce
        if  filters:
            mongo_query['filters'] = filters
        if  aggregators:
            mongo_query['aggregators'] = aggregators
        if  add_to_analytics:
            self.analytics.add_query(query, mongo_query)
        return mongo_query
Beispiel #7
0
    def requestquery(self, query, add_to_analytics=True):
        """
        Query analyzer which form request query to DAS from a free text-based form.
        Return MongoDB request query.
        """
        # strip operators while we will match words against them
        operators = [o.strip() for o in self.operators]

        # find out if input query contains filters/mapreduce functions
        mapreduce = []
        filters = []
        aggregators = []
        pat = re.compile(r"^([a-z_]+\.?)+$")  # match key.attrib
        if query and type(query) is bytes:
            if query.find("|") != -1:
                split_results = query.split("|")
                query = split_results[0]
                for item in split_results[1:]:
                    func = item.split("(")[0].strip()
                    for filter in self.filters:
                        if item.find(filter) == -1:
                            continue
                        for elem in item.replace(filter, '').split(','):
                            dasfilter = elem.strip()
                            if not dasfilter:
                                continue
                            if not pat.match(dasfilter):
                                msg = 'Incorrect filter: %s' % dasfilter
                                raise Exception(msg)
                            if dasfilter not in filters:
                                filters.append(dasfilter)
                    if func in self.aggregators:
                        aggregators = [agg for agg in get_aggregator(item)]
                    else:
                        mapreduce.append(item)
#                mapreduce = [i.strip() for i in split_results[1:]]
            query = query.strip()
            if query[0] == "{" and query[-1] == "}":
                mongo_query = json.loads(query)
                if mongo_query.keys() != ['fields', 'spec']:
                    raise Exception("Invalid MongoDB query %s" % query)
                if add_to_analytics:
                    self.analytics.add_query(query, mongo_query)
                return mongo_query

        # check input query and prepare it for processing
        findbracketobj(query)  # check brackets in a query
        skeys = []
        query = query.strip().replace(",", " ")
        query = add_spaces(query, operators)
        slist = query.split()
        idx = 0

        # main loop, step over words in query expression and
        # findout selection keys and conditions
        condlist = []
        while True:
            if idx >= len(slist):
                break
            word = slist[idx].strip()
            if word in self.daskeys:  # look-up for selection keys
                try:
                    next_word = slist[idx + 1]
                    if next_word not in operators and word not in skeys:
                        skeys.append(word)
                except:
                    pass
                if word == slist[-1] and word not in skeys:  # last word
                    skeys.append(word)
            elif word in operators:  # look-up conditions
                oper = word
                prev_word = slist[idx - 1]
                next_word = slist[idx + 1]
                if word in ['in', 'nin']:
                    first = next_word
                    if first.find('[') == -1:
                        msg = 'No open bracket [ found in query expression'
                        raise Exception(msg)
                    arr = []
                    found_last = False
                    for item in slist[idx + 1:]:
                        if item.find(']') != -1:
                            found_last = True
                        val = item.replace('[', '').replace(']', '')
                        if val:
                            arr.append(val)
                    if not found_last:
                        msg = 'No closed bracket ] found in query expression'
                        raise Exception(msg)
                    value = arr
                elif word == 'last':
                    value = convert2date(next_word)
                    cdict = dict(key='date', op='in', value=value)
                    condlist.append(cdict)
                    value = None
                else:
                    value = next_word
                if prev_word == 'date':
                    if word != 'last':  # we already converted date
                        if type(value) is bytes:
                            value = [das_dateformat(value), time.time()]
                        elif type(value) is list:
                            try:
                                value1 = das_dateformat(value[0])
                                value2 = das_dateformat(value[1])
                                value = [value1, value2]
                            except:
                                msg = "Unable to parse %s" % value
                                raise Exception(msg)
                    cdict = dict(key='date', op='in', value=value)
                    condlist.append(cdict)
                    value = None
                idx += 1
                if not value:
                    continue
                key = prev_word
                value = adjust_value(value)
                if key == 'date':
                    cdict = dict(key=key, op=oper, value=value)
                    condlist.append(cdict)
                    continue
                for system in self.map.list_systems():
                    mapkey = self.map.find_mapkey(system, key)
                    if mapkey:
                        cdict = dict(key=mapkey, op=oper, value=value)
                        if cdict not in condlist:
                            condlist.append(cdict)
            else:
                if word not in skeys and word in self.daskeys:
                    skeys.append(word)
            idx += 1
        if not condlist and skeys:  # e.g. --query="dataset"
            for key in skeys:
                for system, daskeys in self.map.daskeys().items():
                    if key in daskeys:
                        mapkey = self.map.find_mapkey(system, key)
                        cdict = dict(key=mapkey, op="=", value="*")
                        condlist.append(cdict)
                        break
#        print "\n### condlist", condlist
        spec = mongo_exp(condlist)
        #        print "### spec", spec
        if skeys:
            fields = skeys
        else:
            fields = None
        mongo_query = dict(fields=fields, spec=spec)
        # add mapreduce if it exists
        if mapreduce:
            mongo_query['mapreduce'] = mapreduce
        if filters:
            mongo_query['filters'] = filters
        if aggregators:
            mongo_query['aggregators'] = aggregators
        if add_to_analytics:
            self.analytics.add_query(query, mongo_query)
        return mongo_query