def test_adjust_value(self): """Test adjust_value""" expect = 0 result = adjust_value("0") self.assertEqual(expect, result) expect = 1 result = adjust_value("1") self.assertEqual(expect, result) expect = 1.1 result = adjust_value("1.1") self.assertEqual(expect, result) expect = -1.1 result = adjust_value("-1.1") self.assertEqual(expect, result) expect = '2009.05.19 17:41:25' result = adjust_value("2009.05.19 17:41:25") self.assertEqual(expect, result) expect = None result = adjust_value("null") self.assertEqual(expect, result) expect = None result = adjust_value("(null)") self.assertEqual(expect, result)
def parser(self, source, api): """ RunSummary data-service parser. """ row = {} hold = None context = ET.iterparse(source, events=("start", "end")) root = None for item in context: event, elem = item if event == "start" and root is None: root = elem # the first element is root if elem.tag == 'cmsdb': continue if event == 'start' and elem.tag == 'runInfo': continue if event == 'end' and elem.tag == 'runInfo': yield dict(run=row) row = {} elem.clear() if hold and event == 'end' and elem.tag == hold: hold = None continue if event == 'start': sub = {} children = elem.getchildren() # I don't apply notation conversion to all children # since those are not likely to overlap if children: for child in children: sub[child.tag] = adjust_value(child.text) row[elem.tag] = sub hold = elem.tag else: if not hold: nkey = self.dasmapping.notation2das\ (self.name, elem.tag, api) row[nkey] = adjust_value(elem.text) if root: root.clear() source.close()
def worker_v2(url, query): """ Query RunRegistry service, see documentation at https://twiki.cern.ch/twiki/bin/viewauth/CMS/DqmRrApi url=http://pccmsdqm04.cern.ch/runregistry/xmlrpc """ server = xmlrpclib.ServerProxy(url) namespace = 'GLOBAL' if isinstance(query, str) or isinstance(query, unicode): try: data = server.RunLumiSectionRangeTable.exportJson(namespace, query) except Exception as _err: data = "{}" # empty response for row in json.loads(data): yield row elif isinstance(query, dict): iformat = 'tsv_runs' # other formats are xml_all, csv_runs try: data = server.RunDatasetTable.export(namespace, iformat, query) except Exception as _err: data = "" # empty response titles = [] for line in data.split('\n'): if not line: continue if not titles: for title in line.split('\t')[:-1]: title = title.lower() if title != 'run_number': title = title.replace('run_', '') titles.append(title) continue val = line.split('\t')[:-1] if len(val) != len(titles): continue record = {} for idx in range(0, len(titles)): key = titles[idx] record[key] = adjust_value(val[idx]) yield dict(run=record)
def requestquery(self, query, add_to_analytics=True): """ Query analyzer which form request query to DAS from a free text-based form. Return MongoDB request query. """ # strip operators while we will match words against them operators = [o.strip() for o in self.operators] # find out if input query contains filters/mapreduce functions mapreduce = [] filters = [] aggregators = [] pat = re.compile(r"^([a-z_]+\.?)+$") # match key.attrib if query and type(query) is types.StringType: if query.find("|") != -1: split_results = query.split("|") query = split_results[0] for item in split_results[1:]: func = item.split("(")[0].strip() for filter in self.filters: if item.find(filter) == -1: continue for elem in item.replace(filter, '').split(','): dasfilter = elem.strip() if not dasfilter: continue if not pat.match(dasfilter): msg = 'Incorrect filter: %s' % dasfilter raise Exception(msg) if dasfilter not in filters: filters.append(dasfilter) if func in self.aggregators: aggregators = [agg for agg in get_aggregator(item)] else: mapreduce.append(item) # mapreduce = [i.strip() for i in split_results[1:]] query = query.strip() if query[0] == "{" and query[-1] == "}": mongo_query = json.loads(query) if mongo_query.keys() != ['fields', 'spec']: raise Exception("Invalid MongoDB query %s" % query) if add_to_analytics: self.analytics.add_query(query, mongo_query) return mongo_query # check input query and prepare it for processing findbracketobj(query) # check brackets in a query skeys = [] query = query.strip().replace(",", " ") query = add_spaces(query, operators) slist = query.split() idx = 0 # main loop, step over words in query expression and # findout selection keys and conditions condlist = [] while True: if idx >= len(slist): break word = slist[idx].strip() if word in self.daskeys: # look-up for selection keys try: next_word = slist[idx+1] if next_word not in operators and word not in skeys: skeys.append(word) except: pass if word == slist[-1] and word not in skeys: # last word skeys.append(word) elif word in operators: # look-up conditions oper = word prev_word = slist[idx-1] next_word = slist[idx+1] if word in ['in', 'nin']: first = next_word if first.find('[') == -1: msg = 'No open bracket [ found in query expression' raise Exception(msg) arr = [] found_last = False for item in slist[idx+1:]: if item.find(']') != -1: found_last = True val = item.replace('[', '').replace(']', '') if val: arr.append(val) if not found_last: msg = 'No closed bracket ] found in query expression' raise Exception(msg) value = arr elif word == 'last': value = convert2date(next_word) cdict = dict(key='date', op='in', value=value) condlist.append(cdict) value = None else: value = next_word if prev_word == 'date': if word != 'last': # we already converted date if type(value) is types.StringType: value = [das_dateformat(value), time.time()] elif type(value) is types.ListType: try: value1 = das_dateformat(value[0]) value2 = das_dateformat(value[1]) value = [value1, value2] except: msg = "Unable to parse %s" % value raise Exception(msg) cdict = dict(key='date', op='in', value=value) condlist.append(cdict) value = None idx += 1 if not value: continue key = prev_word value = adjust_value(value) if key == 'date': cdict = dict(key=key, op=oper, value=value) condlist.append(cdict) continue for system in self.map.list_systems(): mapkey = self.map.find_mapkey(system, key) if mapkey: cdict = dict(key=mapkey, op=oper, value=value) if cdict not in condlist: condlist.append(cdict) else: if word not in skeys and word in self.daskeys: skeys.append(word) idx += 1 if not condlist and skeys: # e.g. --query="dataset" for key in skeys: for system, daskeys in self.map.daskeys().items(): if key in daskeys: mapkey = self.map.find_mapkey(system, key) cdict = dict(key=mapkey, op="=", value="*") condlist.append(cdict) break # print "\n### condlist", condlist spec = mongo_exp(condlist) # print "### spec", spec if skeys: fields = skeys else: fields = None mongo_query = dict(fields=fields, spec=spec) # add mapreduce if it exists if mapreduce: mongo_query['mapreduce'] = mapreduce if filters: mongo_query['filters'] = filters if aggregators: mongo_query['aggregators'] = aggregators if add_to_analytics: self.analytics.add_query(query, mongo_query) return mongo_query
def requestquery(self, query, add_to_analytics=True): """ Query analyzer which form request query to DAS from a free text-based form. Return MongoDB request query. """ # strip operators while we will match words against them operators = [o.strip() for o in self.operators] # find out if input query contains filters/mapreduce functions mapreduce = [] filters = [] aggregators = [] pat = re.compile(r"^([a-z_]+\.?)+$") # match key.attrib if query and type(query) is bytes: if query.find("|") != -1: split_results = query.split("|") query = split_results[0] for item in split_results[1:]: func = item.split("(")[0].strip() for filter in self.filters: if item.find(filter) == -1: continue for elem in item.replace(filter, '').split(','): dasfilter = elem.strip() if not dasfilter: continue if not pat.match(dasfilter): msg = 'Incorrect filter: %s' % dasfilter raise Exception(msg) if dasfilter not in filters: filters.append(dasfilter) if func in self.aggregators: aggregators = [agg for agg in get_aggregator(item)] else: mapreduce.append(item) # mapreduce = [i.strip() for i in split_results[1:]] query = query.strip() if query[0] == "{" and query[-1] == "}": mongo_query = json.loads(query) if mongo_query.keys() != ['fields', 'spec']: raise Exception("Invalid MongoDB query %s" % query) if add_to_analytics: self.analytics.add_query(query, mongo_query) return mongo_query # check input query and prepare it for processing findbracketobj(query) # check brackets in a query skeys = [] query = query.strip().replace(",", " ") query = add_spaces(query, operators) slist = query.split() idx = 0 # main loop, step over words in query expression and # findout selection keys and conditions condlist = [] while True: if idx >= len(slist): break word = slist[idx].strip() if word in self.daskeys: # look-up for selection keys try: next_word = slist[idx + 1] if next_word not in operators and word not in skeys: skeys.append(word) except: pass if word == slist[-1] and word not in skeys: # last word skeys.append(word) elif word in operators: # look-up conditions oper = word prev_word = slist[idx - 1] next_word = slist[idx + 1] if word in ['in', 'nin']: first = next_word if first.find('[') == -1: msg = 'No open bracket [ found in query expression' raise Exception(msg) arr = [] found_last = False for item in slist[idx + 1:]: if item.find(']') != -1: found_last = True val = item.replace('[', '').replace(']', '') if val: arr.append(val) if not found_last: msg = 'No closed bracket ] found in query expression' raise Exception(msg) value = arr elif word == 'last': value = convert2date(next_word) cdict = dict(key='date', op='in', value=value) condlist.append(cdict) value = None else: value = next_word if prev_word == 'date': if word != 'last': # we already converted date if type(value) is bytes: value = [das_dateformat(value), time.time()] elif type(value) is list: try: value1 = das_dateformat(value[0]) value2 = das_dateformat(value[1]) value = [value1, value2] except: msg = "Unable to parse %s" % value raise Exception(msg) cdict = dict(key='date', op='in', value=value) condlist.append(cdict) value = None idx += 1 if not value: continue key = prev_word value = adjust_value(value) if key == 'date': cdict = dict(key=key, op=oper, value=value) condlist.append(cdict) continue for system in self.map.list_systems(): mapkey = self.map.find_mapkey(system, key) if mapkey: cdict = dict(key=mapkey, op=oper, value=value) if cdict not in condlist: condlist.append(cdict) else: if word not in skeys and word in self.daskeys: skeys.append(word) idx += 1 if not condlist and skeys: # e.g. --query="dataset" for key in skeys: for system, daskeys in self.map.daskeys().items(): if key in daskeys: mapkey = self.map.find_mapkey(system, key) cdict = dict(key=mapkey, op="=", value="*") condlist.append(cdict) break # print "\n### condlist", condlist spec = mongo_exp(condlist) # print "### spec", spec if skeys: fields = skeys else: fields = None mongo_query = dict(fields=fields, spec=spec) # add mapreduce if it exists if mapreduce: mongo_query['mapreduce'] = mapreduce if filters: mongo_query['filters'] = filters if aggregators: mongo_query['aggregators'] = aggregators if add_to_analytics: self.analytics.add_query(query, mongo_query) return mongo_query