def result(self, query, idx=0, limit=None): """ Get results either from cache or from explicit call """ self.logger.info('input query=%s' % query) results = [] dasquery = DASQuery(query) query = dasquery.mongo_query # check if we have any service which cover the query # otherwise decompose it into list of queries service_map = dasquery.service_apis_map() if not service_map: msg = 'no APIs found to answer input query, will decompose it' self.logger.info(msg) skeys = query['fields'] if not skeys: skeys = [] for key in skeys: newquery = DASQuery(dict(fields=[key], spec=query['spec'])) self.call(newquery) # process query else: self.call(dasquery) # process query # lookup provided query in a cache if not self.noresults: results = self.get_from_cache(dasquery, idx, limit) return results
def test_dasquery(self): """ checks integration with DASQuery on the following cases: - no dataset matching - more than more interpretation available - only one interpretation (standard query execution) """ # multiple interpretations try: DASQuery('dataset dataset=*Zmm*', instance=self.inst) except WildcardMultipleMatchesException: pass else: self.fail('expected WildcardMultipleMatchesException') # none (no such dataset) msg = '' try: DASQuery('dataset dataset=*Zmmdsjfdsjguuds*', instance=self.inst) except WildcardMatchingException as exc: msg = str(exc) self.assertTrue('pattern you specified did not match ' 'any datasets in DAS cache' in msg) # DASQuery shall be parsed correctly results = False try: results = DASQuery('dataset dataset=/*Zmm*/*/*', instance=self.inst) except WildcardMatchingException: results = False self.assertTrue([results])
def result(self, query, idx=0, limit=None): """ Get results either from cache or from explicit call """ self.logger.info('input query=%s' % query) results = [] dasquery = DASQuery(query) dasquery.add_to_analytics() query = dasquery.mongo_query # check if we have any service which cover the query # otherwise decompose it into list of queries service_map = dasquery.service_apis_map() if not service_map: msg = 'no APIs found to answer input query, will decompose it' self.logger.info(msg) skeys = query['fields'] if not skeys: skeys = [] for key in skeys: newquery = DASQuery(dict(fields=[key], spec=query['spec'])) self.call(newquery) # process query else: self.call(dasquery) # process query # lookup provided query in a cache if not self.noresults: results = self.get_from_cache(dasquery, idx, limit) return results
def test_query_representations(self): "Test different DAS query representations" iquery = 'file dataset=/a/b/c' pquery = 'file dataset.name=/a/b/c' # query after parsing mquery = {'fields': ['file'], 'spec': {'dataset.name': '/a/b/c'}} squery = { 'fields': ['file'], 'spec': [{ 'key': 'dataset.name', 'value': '"/a/b/c"' }] } q1 = DASQuery(iquery) q2 = DASQuery(mquery) q3 = DASQuery(squery) self.assertEqual(iquery, q1.query) self.assertEqual(mquery, q1.mongo_query) self.assertEqual(squery, q1.storage_query) self.assertEqual(pquery, q2.query) # should be equal to parsing query self.assertEqual(mquery, q2.mongo_query) self.assertEqual(squery, q2.storage_query) self.assertEqual(pquery, q3.query) # should be equal to parsing query self.assertEqual(mquery, q3.mongo_query) self.assertEqual(squery, q3.storage_query)
def test_query_filters_aggregators(self): "Test DAS query with filters and aggregators" iquery = 'file dataset=/a/b/c | grep file.name,file.size' dquery = DASQuery(iquery) self.assertEqual(dquery, DASQuery(iquery)) expect = ['file.name', 'file.size'] filters = list(dquery.filters) self.assertEqual(expect, filters)
def generate_dasquery(self, uinput, inst, html_error=True): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) """ def helper(msg, html_error=None): """Helper function which provide error template""" if not html_error: return msg guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) page = self.templatepage('das_ambiguous', msg=msg, base=self.base, guide=guide) return page if not uinput: return 1, helper('No input query') # Generate DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst) except Exception as err: return 1, helper(das_parser_error(uinput, str(err)), html_error) fields = dasquery.mongo_query.get('fields', []) if not fields: fields = [] spec = dasquery.mongo_query.get('spec', {}) for word in fields+spec.keys(): found = 0 if word in DAS_DB_KEYWORDS: found = 1 for key in self.daskeys: if word.find(key) != -1: found = 1 if not found: msg = 'Provided input does not contain a valid DAS key' return 1, helper(msg, html_error) if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find('queries') != -1: pass elif uinput.find('records') != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = 'Fail to lookup DASQuery service API map' print msg print_exc(exc) return 1, helper(msg, html_error) if not service_map: msg = "None of the API's registered in DAS " msg += "can resolve this query" return 1, helper(msg, html_error) return 0, dasquery
def test_query_properties(self): "Test DAS query properties" iquery = 'file dataset=/a/b/c instance=global' query = DASQuery(iquery) self.assertEqual('global', query.instance) iquery = 'file dataset=/a/b/c system=dbs' query = DASQuery(iquery) self.assertEqual('dbs', query.system) iquery = 'file dataset=/a/b/c | unique | grep file.name' query = DASQuery(iquery) filters = query.filters filters.sort() self.assertEqual(['file.name'], filters)
def test_decode(self): "test query decoding" sq1 = { 'fields': None, 'spec': [{ 'key': 'a.b.c', 'value': '"ghi"' }, { 'key': 'd.e.f', 'value': '"jkl"' }] } sq2 = { 'fields': None, 'spec': [{ 'key': 'a.b.c', 'value': '"ghi"' }, { 'key': 'd.e.f', 'value': '"jkl"' }] } q1 = DASQuery(sq1).mongo_query q2 = decode_mongo_query(sq2) self.assertEqual( json.JSONEncoder(sort_keys=True).encode(q1), json.JSONEncoder(sort_keys=True).encode(q2))
def test_loose_query(self): "Test loose property of DAS query" iquery = "file dataset=/a/b/c" mquery = {'fields': ['file'], 'spec': {'dataset.name': '/a/b/c*'}} result = DASQuery(iquery).loose_query self.assertEqual(mquery, result)
def keylearning_js(dascore, query, kfile, verbose=False): "Create keylearning js file from series of given query" print("Create: %s" % kfile) with open(kfile, 'a') as stream: dasquery = DASQuery(query) result = [r for r in dascore.result(dasquery, 0, 1)][0] # get one record only if verbose: print(dasquery) print(result) mongo_q = dasquery.mongo_query skip = ['das_id', 'cache_id', 'qhash', 'error', 'reason', 'das'] keys = [k for k in mongo_q['fields'] if k not in skip] keys += [k.split('.')[0] for k in mongo_q['spec'].keys()] keys = list(set(keys)) system = result['das']['system'][0] urn = result['das']['api'][0] members = process_document(result) jsrow = json.dumps(dict(keys=keys, members=members, system=system, urn=urn)) if verbose: print(jsrow) stream.write(jsrow) stream.write('\n') for member in members: stems = member.split('.') jsrow = json.dumps(dict(member=member, stems=stems)) stream.write(jsrow) stream.write('\n')
def test_qhash(self): "Test qhash property" sq1 = { 'fields': None, 'spec': [{ 'key': 'a.b.c', 'value': '"ghi"' }, { 'key': 'd.e.f', 'value': '"jkl"' }] } sq2 = { 'fields': None, 'spec': [{ 'key': 'a.b.c', 'value': '"ghi"' }, { 'key': 'd.e.f', 'value': '"jkl"' }] } qh1 = DASQuery(sq1).qhash qh2 = genkey(sq2) self.assertEqual(qh1, qh2)
def run(out, host, query, idx, limit, debug, thr, ckey, cert): """ Worker function which performs query look-up in DAS and print results to stdout. It should be spawned as separate process to test DAS server. """ time0 = time.time() data = get_data(host, query, idx, limit, debug, thr, ckey, cert) if isinstance(data, dict): jsondict = data else: jsondict = json.loads(data) status = jsondict.get('status', None) reason = jsondict.get('reason', None) nres = jsondict.get('nresults', None) tstm = jsondict.get('timestamp', 0) data = jsondict.get('data') if data and isinstance(data, list) and len(data): qhash = data[0].get('qhash') else: qhash = DASQuery(query + ' instance=%s' % DBS_GLOBAL).qhash msg = 'status: %s client: %s server: %s nresults: %s query: %s qhash: %s' \ % (status, etime(time0), etime(tstm), nres, query, qhash) if nres == 0: print(jsondict) if reason: msg += ' reason: %s' % reason out.put((nres, status, qhash)) print(msg) if debug: if nres > 0: if len(data): print(data[0]) else: print("### NO DATA:", jsondict)
def _do_query_rewrite(self, q, fields_avail, pk): """ generates a nested query that: * requests entity by PK * projects the missing fields """ q_filters = q.filters # find all lookup (primary keys) for given das entity # It is safe to combine the queries filters_first = [ f for f in q_filters if get_filter_name(f) in fields_avail ] filters_nested = [ f for f in q_filters if get_filter_name(f) not in fields_avail ] q1_mongo = q.mongo_query.copy() q1_mongo['filters'] = { 'grep': list(set(filters_first) | set([ pk, ])), } q1 = DASQuery(q1_mongo) q2_mongo = q.mongo_query.copy() # make DASQuery pass dataset wildcard check pk_to_replace = '/a/b/c' if pk == 'dataset.name' else '<PK>' q2_mongo['spec'] = {pk: pk_to_replace} q2_mongo['filters'] = {'grep': list(filters_nested)} # if the queries are the same, the rewrite is unsuccessful if set(q1_mongo['spec'].keys()) == set(q2_mongo['spec'].keys()): return q2 = DASQuery(q2_mongo) msg = self.render_template( 'cms_query_rewrite', q1_str=self.convert2dasql(q1), q2_str=self.convert2dasql(q2), pk=pk, # user replaces this with PK from 1st query pk_to_replace=pk_to_replace, cli_docs=self.CLI_LINK) return msg
def test_query_str_repr(self): "Test DAS query str/repr method" mquery = {'fields': ['file'], 'spec': {u'dataset.name': '/a/b/c'}} iquery = 'file dataset=/a/b/c' dasquery = DASQuery(iquery) msg = "<query='''%s''' instance=%s qhash=%s services=%s>" \ % (iquery, dasquery.instance, dasquery.qhash, dasquery.services) dasstr = '%s' % dasquery self.assertEqual(msg, dasstr)
def test_pattern_query(self): "Test pattern property of DAS query" val = '/a/b/c' query = 'file dataset=%s' % val pat = re.compile('^%s.*' % val) mquery = {'fields': ['file'], 'spec': {'dataset.name': pat}} pattern_query = DASQuery(query).pattern_query result = pattern_query['spec']['dataset.name'] self.assertEqual(pat.pattern, result.pattern)
def test_encode(self): "test query encoding" q1 = {'fields': None, 'spec': {'a.b.c': 'ghi', 'd.e.f': 'jkl'}} q2 = {'fields': None, 'spec': {'a.b.c': 'ghi', 'd.e.f': 'jkl'}} sq1 = DASQuery(q1).storage_query sq2 = encode_mongo_query(q2) self.assertEqual( json.JSONEncoder(sort_keys=True).encode(sq1), json.JSONEncoder(sort_keys=True).encode(sq2))
def get_fields_in_query_result(self, dasquery): """ returns a list of fields in the results of dasquery (must be in cache) """ mongo_query = dasquery.mongo_query.copy() mongo_query['filters'] = {} dasquery = DASQuery(mongo_query) fieldlist = [] if dasquery.mongo_query: # loop over few records to get unique set of attributes for row in self.dasmgr.get_from_cache(dasquery, idx=0, limit=10): fieldlist += self.cms_rep.get_result_fieldlist(row) return list(set(fieldlist))
def test_bare_query(self): "Test bare query method" q1 = { 'fields': None, 'spec': { 'a.b.c': 'ghi', 'd.e.f': 'jkl' }, 'filters': ['foo'] } q2 = {'fields': None, 'spec': {'a.b.c': 'ghi', 'd.e.f': 'jkl'}} bq1 = DASQuery(q1).to_bare_query() self.assertEqual( json.JSONEncoder(sort_keys=True).encode(bq1), json.JSONEncoder(sort_keys=True).encode(q2))
def testAggregators(self): """test DASCore aggregators via zip service""" # test DAS workflow query = "file dataset=/ZMM/Summer11-DESIGN42_V11_428_SLHC1-v1/GEN-SIM | grep file.size | sum(file.size)" dquery = DASQuery(query) result = self.das.call(dquery) result = self.das.get_from_cache(dquery) result = [r for r in result][0] if 'das' in result: del result['das'] # strip off DAS info expect = {"function": "sum", "result": {"value": 5658838455}, "key": "file.size", "_id":0} # the result may have value == 'N/A' when test is run w/o certificates (travis) # in this cas we just skip it if result['result']['value'] != 'N/A': self.assertEqual(expect, result)
def test_query_with_instance(self): "Test DAS query properties" inst = 'global' iquery = 'file dataset=/a/b/c instance=%s' % inst query = DASQuery(iquery) self.assertEqual(inst, query.instance) iquery = 'file dataset=/a/b/c' query = DASQuery(iquery, instance=inst) self.assertEqual(inst, query.instance) iquery = {'fields': ['file'], 'spec': {'dataset': '/a/b/c'}} query = DASQuery(iquery, instance=inst) self.assertEqual(inst, query.instance) iquery = { 'fields': ['file'], 'spec': { 'dataset': '/a/b/c' }, 'instance': inst } query = DASQuery(iquery) self.assertEqual(inst, query.instance) iquery = { 'fields': ['file'], 'spec': [{ 'key': 'dataset', 'value': '"/a/b/c"' }], 'instance': inst } query = DASQuery(iquery, instance=inst) self.assertEqual(inst, query.instance) iquery = { 'fields': ['file'], 'spec': [{ 'key': 'dataset', 'value': '"/a/b/c"' }], 'instance': inst } query = DASQuery(iquery) self.assertEqual(inst, query.instance)
def test_aggregator_duplicates(self): """Test aggregator function""" dasquery = DASQuery(dict(fields=None, spec={'dataset': '/a/b/c'})) qhash = dasquery.qhash das = { 'expire': 10, 'primary_key': 'run.a', 'record': 1, 'api': ['api'], 'system': ['foo'], 'services': [], 'condition_keys': ['run'], 'instance': None } rows = [] row = {'run': {'a': 1, 'b': 1}, 'das': das, '_id': 1, 'das_id': [1]} rows.append(row) row = {'run': {'a': 1, 'b': 1}, 'das': das, '_id': 2, 'das_id': [2]} rows.append(row) res = (r for r in rows) result = [r for r in aggregator(dasquery, res, das['expire'])] for r in result: del r['das']['ts'] # we don't need record timestamp expect = [{ 'run': [{ 'a': 1, 'b': 1 }, { 'a': 1, 'b': 1 }], 'das': das, 'qhash': qhash, 'das_id': [1, 2], 'cache_id': [1, 2] }] self.assertEqual(result, expect)
def generate_dasquery(self, uinput, inst, html_mode=True, qcache=0): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) :param uinput: user's input :param inst: DBS instance :param html_mode: whether errors shall be output in html """ def error_msg(msg, show_kws=False, tmpl='das_ambiguous', **kwargs): """ Helper function which renders an error template, default is das_ambiguous, but can be overriden via tmpl param. Template has two versions: html and text for CLI. The template is passed with msg, base, guide, and **kwargs. """ # TODO: this shall be done by inheriting a parent template # TODO: no header/footer? guide = self.templatepage('dbsql_vs_dasql', operators=', '.join(das_operators())) # render keyword search loader, if needed kws = '' if show_kws: kws = self.templatepage('kwdsearch_via_ajax', uinput=uinput, jsonize=jsonize, url_extend_params_as_dict=url_extend_params_as_dict, inst=inst or self.dbs_global, kws_host=self._get_kws_host()) # render the appropriate template (html vs text mode) page = self.templatepage(tmpl + ('_txt' if not html_mode else ''), msg=msg, base=self.base, guide=guide, kws_enabled=show_kws, kws=kws, **kwargs) return page if not uinput: return 1, error_msg('No input query') # Generate a DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst, qcache=qcache) except WildcardMultipleMatchesException as err: # TODO: hints could be shown here also, but it makes no sense, as # they are shown only when no matches are found if isinstance(err.options.values, list) and err.options.values: return 1, error_msg(str(err), tmpl='das_wildcard_err', suggest=err.options.values, url_extend_params=url_extend_params) return 1, error_msg(str(err), tmpl='das_wildcard_err', url_extend_params=url_extend_params) except WildcardMatchingException as err: kwds = {'input':uinput, 'instance':inst} hints = self.hint_datasets(kwds) page = error_msg(str(err)) for hint in hints: page += self.templatepage('hint', url_extend_params=url_extend_params, hint=hint, base=self.base, dbs=self.dbs_global) return 1, page except Exception as err: # show multiple dataset matches for 1 keyword queries if hasattr(response, 'dataset_matches_msg'): return 1, error_msg(response.dataset_matches_msg, show_kws=self.is_kws_enabled()) # for non Wildcard parsing errors, show the Keyword Search return 1, error_msg(str(err), show_kws=self.is_kws_enabled()) if dasquery.error: return 1, error_msg(dasquery.error) # DAS query validation if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find('queries') != -1: pass elif uinput.find('records') != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = 'Fail to obtain service API map for this DASQuery' print(msg) print_exc(exc) return 1, error_msg(msg) if not service_map: return 1, error_msg('Unable to resolve the query over the ' 'available services: %s' % dasquery) return 0, dasquery
def call(self, query, **kwds): """ Top level DAS api which execute a given query using underlying data-services. It follows the following steps: - parse input query - identify data-sercices based on selection keys and where clause conditions - construct DAS workflow and execute data-service API calls. At this step individual data-services store results into DAS cache. Return status 0/1 depending on success of the calls, can be used by workers on cache server. kwds is provided for compatibility with web layer, e.g. it may invoke this method with additional pid parameter. """ def update_das_query(dasquery, status, reason=None): "Update DAS query record with given status and reason" self.rawcache.update_query_record(dasquery, status, reason=reason) self.rawcache.add_to_record(\ dasquery, {'das.timer': get_das_timer()}, system='das') self.logger.info('input query=%s' % query) das_timer('DASCore::call', self.verbose) if isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': dasquery = query else: dasquery = DASQuery(query) for col in ['merge', 'cache']: self.rawcache.remove_expired(dasquery, col) query = dasquery.mongo_query spec = query.get('spec') fields = query.get('fields') if fields == ['records']: msg = 'look-up all records in cache' self.logger.info(msg) return 'in cache' if spec == dict(records='*'): self.logger.info("look-up everything in cache") return 'in cache' for record in self.rawcache.find_specs(dasquery): status = record['das']['status'] msg = 'found query %s in cache, status=%s\n' \ % (record['query'], status) self.logger.info(msg) print(dastimestamp('DAS INFO'), msg) return status self.logger.info(dasquery) das_timer('das_record', self.verbose) services = self.insert_query_records(dasquery) if not services: msg = 'unable to locate data-services to fulfill this request' msg += ', will iterate over all registered services' print(dastimestamp('DAS WARNING '), dasquery, msg) services = dasquery.services if dasquery.services else self.systems try: if self.multitask: jobs = [] for srv in sorted(services): jobs.append(self.taskmgr.spawn(self.worker, srv, dasquery)) self.taskmgr.joinall(jobs) else: for srv in services: self.worker(srv, dasquery) except Exception as exc: print_exc(exc) return 'fail' self.logger.info('\n##### merging ######\n') update_das_query(dasquery, 'merging') das_timer('merge', self.verbose) # check that all query record statuses are ok, i.e. we did insert records # this status is set by self.rawcache.update_cache for idx in range(self.collect_wait_time): records = self.rawcache.find_query_record(dasquery) statuses = [] for row in records: system = row['das']['system'] status = row['das']['status'] self.logger.info("### query record status %s %s %s" % (dasquery.qhash, system, status)) statuses.append(status) all_statuses = sorted(list(set(statuses))) # at this point we're looking that all services will have 'ok' and das status will be 'merging' if len(all_statuses) == 2 and all_statuses == ['merging', 'ok']: break time.sleep(1) # now we can merge records status = self.rawcache.merge_records(dasquery) das_timer('merge', self.verbose) # check if we have service records and properly setup status self.logger.info('\n##### check services ######\n') das_services = self.rawcache.check_services(dasquery) reason = '' status = 'ok' if not das_services: if 'records' in dasquery.query: status = 'ok' # keep status ok for 'records' queries else: reason = 'no data records found in DAS cache' status = 'fail' print(dastimestamp('DAS ERROR '), dasquery, reason) update_das_query(dasquery, status, reason) das_timer('DASCore::call', self.verbose) return status
def generate_dasquery(self, uinput, inst, html_mode=True): """ Check provided input as valid DAS input query. Returns status and content (either error message or valid DASQuery) :param uinput: user's input :param inst: DBS instance :param html_mode: whether errors shall be output in html """ def error_msg(msg, show_kws=False, tmpl="das_ambiguous", **kwargs): """ Helper function which renders an error template, default is das_ambiguous, but can be overriden via tmpl param. Template has two versions: html and text for CLI. The template is passed with msg, base, guide, and **kwargs. """ guide = self.templatepage("dbsql_vs_dasql", operators=", ".join(das_operators())) # render keyword search loader, if needed kws = "" if show_kws: kws = self.templatepage( "kwdsearch_via_ajax", uinput=uinput, inst=inst or self.dbs_global, kws_host=self._get_kws_host() ) # render the appropriate template (html vs text mode) page = self.templatepage( tmpl + ("_txt" if not html_mode else ""), msg=msg, base=self.base, guide=guide, kws_enabled=show_kws, kws=kws, **kwargs ) return page if not uinput: return 1, error_msg("No input query") # Generate a DASQuery object, if it fails we catch the exception and # wrap it for upper layer (web interface) try: dasquery = DASQuery(uinput, instance=inst) except WildcardMultipleMatchesException as err: das_parser_error(uinput, str(err).replace("\n", "")) return 1, error_msg(str(err), tmpl="das_wildcard_err", suggest=err.options.values) except WildcardMatchingException as err: das_parser_error(uinput, str(type(err)) + " " + str(err)) return 1, error_msg(str(err)) except Exception as err: das_parser_error(uinput, str(type(err)) + " " + str(err)) # show multiple dataset matches for 1 keyword queries if hasattr(response, "dataset_matches_msg"): return 1, error_msg(response.dataset_matches_msg, show_kws=self.is_kws_enabled()) # for non Wildcard parsing errors, show the Keyword Search return 1, error_msg(str(err), show_kws=self.is_kws_enabled()) # DAS query validation if isinstance(uinput, dict): # DASQuery w/ {'spec':{'_id:id}} pass elif uinput.find("queries") != -1: pass elif uinput.find("records") != -1: pass else: # normal user DAS query try: service_map = dasquery.service_apis_map() except Exception as exc: msg = "Fail to obtain service API map for this DASQuery" print msg print_exc(exc) return 1, error_msg(msg) if not service_map: return 1, error_msg("Unable to resolve the query over the " "available services: %s" % dasquery) return 0, dasquery
def main(): "Main function" optmgr = DASOptionParser() opts = optmgr.parser.parse_args() t0 = time.time() query = opts.query if 'instance' not in query: query = ' instance=prod/global ' + query debug = opts.verbose dascore = DASCore(debug=debug, nores=opts.noresults) if opts.hash: dasquery = DASQuery(query) mongo_query = dasquery.mongo_query service_map = dasquery.service_apis_map() str_query = dasquery.storage_query print("---------------") print("DAS-QL query :", query) print("DAS query :", dasquery) print("Mongo query :", mongo_query) print("Storage query :", str_query) print("Services :\n") for srv, val in service_map.items(): print("%s : %s\n" % (srv, ', '.join(val))) sys.exit(0) sdict = dascore.keys() if opts.services: msg = "DAS services:" print(msg) print("-"*len(msg)) keys = list(sdict.keys()) keys.sort() for key in keys: print(key) elif opts.service: msg = "DAS service %s:" % opts.service print(msg) print("-"*len(msg)) keys = sdict[opts.service] keys.sort() for key in keys: print(key) elif opts.jsfile: kws_js(dascore, query, opts.idx, opts.limit, opts.jsfile, debug) sys.exit(0) elif opts.kfile: keylearning_js(dascore, query, opts.kfile, debug) sys.exit(0) elif query: idx = opts.idx limit = opts.limit output = opts.nooutput plain = opts.plain qcache = opts.qcache if opts.profile: import cProfile # python profiler import pstats # profiler statistics cmd = 'run(dascore,query,idx,limit,output,plain)' cProfile.runctx(cmd, globals(), locals(), 'profile.dat') info = pstats.Stats('profile.dat') info.sort_stats('cumulative') info.print_stats() else: run(dascore, query, idx, limit, output, plain) elif opts.dasconfig: print(pformat(dascore.dasconfig)) else: print() print("DAS CLI interface, no actions found,") print("please use --help for more options.") timestamp = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) timer = get_das_timer() print("\nDAS execution time:\n") if debug: timelist = [] for _, timerdict in timer.items(): counter = timerdict['counter'] tag = timerdict['tag'] exetime = timerdict['time'] timelist.append((counter, tag, exetime)) timelist.sort() for _, tag, exetime in timelist: print("%s %s sec" % (tag, round(exetime, 2))) print("Total %s sec, %s" % (round(time.time()-t0, 2), timestamp))
def test_aggregator(self): """Test aggregator function""" # 1 row in results dasquery = DASQuery(dict(fields=None, spec={'dataset': '/a/b/c'})) qhash = dasquery.qhash das = { 'expire': 10, 'primary_key': 'vk', 'record': 1, 'api': 'api', 'system': ['foo'], 'services': [], 'condition_keys': ['run'], 'instance': None } row = {'run': 10, 'das': das, '_id': 1, 'das_id': 1} rows = (row for i in range(0, 1)) result = [r for r in aggregator(dasquery, rows, das['expire'])] del result[0]['das']['ts'] # we don't need record timestamp expect = [{ 'run': 10, 'das': das, 'cache_id': [1], 'das_id': [1], 'qhash': qhash }] self.assertEqual(result, expect) # 2 rows with different values for common key rows = [] row = {'run': 1, 'das': das, '_id': 1, 'das_id': 1} rows.append(row) row = {'run': 2, 'das': das, '_id': 1, 'das_id': 1} rows.append(row) res = (r for r in rows) result = [r for r in aggregator(dasquery, res, das['expire'])] for r in result: del r['das']['ts'] # we don't need record timestamp expect = [{ 'run': 1, 'das': das, 'das_id': [1], 'cache_id': [1], 'qhash': qhash }, { 'run': 2, 'das': das, 'das_id': [1], 'cache_id': [1], 'qhash': qhash }] self.assertEqual(result, expect) # 2 rows with common value for common key das = { 'expire': 10, 'primary_key': 'run.a', 'record': 1, 'api': ['api'], 'system': ['foo'], 'services': [], 'condition_keys': ['run'], 'instance': None } rows = [] row = {'run': {'a': 1, 'b': 1}, 'das': das, '_id': 1, 'das_id': [1]} rows.append(row) row = {'run': {'a': 1, 'b': 2}, 'das': das, '_id': 1, 'das_id': [1]} rows.append(row) res = (r for r in rows) result = [r for r in aggregator(dasquery, res, das['expire'])] for r in result: del r['das']['ts'] # we don't need record timestamp expect = [{ 'run': [{ 'a': 1, 'b': 1 }, { 'a': 1, 'b': 2 }], 'das': das, 'das_id': [1], 'cache_id': [1], 'qhash': qhash }] self.assertEqual(result, expect)
def get_data(self, kwargs): """ Invoke DAS workflow and get data from the cache. """ head = dict(timestamp=time.time()) head['args'] = kwargs uinput = kwargs.get('input', '') inst = kwargs.get('instance', self.dbs_global) idx = getarg(kwargs, 'idx', 0) limit = getarg(kwargs, 'limit', 0) # do not impose limit coll = kwargs.get('collection', 'merge') status = kwargs.get('status') error = kwargs.get('error') reason = kwargs.get('reason') dasquery = kwargs.get('dasquery', None) time0 = time.time() if dasquery: dasquery = DASQuery(dasquery, instance=inst) if dasquery.error: return self.page(form + dasquery.error, ctime=time.time()-time0) else: check, content = \ self.generate_dasquery(uinput, inst, html_mode=False) if check: head.update({'status': 'fail', 'reason': content, 'ctime': time.time()-time0, 'input': uinput}) data = [] return head, data dasquery = content # returned content is valid DAS query try: nres = self.dasmgr.nresults(dasquery, coll) data = \ self.dasmgr.get_from_cache(dasquery, idx, limit) # check that we got what we expected data = [r for r in data] if nres and not len(data): for retry in range(1, 3, 5): msg = 'retry in %s sec' % retry dasprint(dastimestamp('DAS WARNING '), msg, dasquery) time.sleep(retry) # retry one more time data = \ self.dasmgr.get_from_cache(dasquery, idx, limit) data = [r for r in data] if len(data): break if nres and not len(data): msg = 'fail to get all data for %s, nres=%s, len(data)=%s' \ % (dasquery, nres, len(data)) dasprint(dastimestamp('DAS WARNING '), msg) status = 'fail' reason = 'Fail to retrieve data from DAS cache, please retry' if dasquery.aggregators: # aggregators split DAS record into sub-system and then # apply aggregator functions, therefore we need to correctly # account for nresults. Resolve generator into list and take # its length as nresults value. data = [r for r in data] nres = len(data) if error: # DAS record contains an error status = 'error' head.update({'status':status, 'nresults':nres, 'ctime': time.time()-time0, 'dasquery': dasquery}) except Exception as exc: status = 'fail' reason = str(exc) print_exc(exc) head.update({'status': status, 'ctime': time.time()-time0, 'dasquery': dasquery}) data = [] head.update({'incache':self.dasmgr.incache(dasquery, coll='cache'), 'apilist':self.dasmgr.apilist(dasquery)}) if reason: head.update({'reason': reason}) if status != 'ok': head.update(self.info()) # check if query had dataset input and returned no results # then run hint functions to find dataset in other DBS instances mquery = dasquery.mongo_query empty = False for item in data: if 'dataset.name' in mquery['spec'] and 'dataset' in mquery['fields'] \ and 'result' not in item: if not item['dataset']: empty = True break if empty: # if no results found add dataset from other DBS instances hints = self.hint_datasets(kwargs) for item in data: item.update({'hints': hints}) return head, data
def main(): "Main function" optmgr = DASOptionParser() opts, _ = optmgr.getOpt() t0 = time.time() query = opts.query if 'instance' not in query: query += ' instance=cms_dbs_prod_global' debug = opts.verbose dascore = DASCore(debug=debug, nores=opts.noresults) if opts.hash: dasquery = DASQuery(query) mongo_query = dasquery.mongo_query service_map = dasquery.service_apis_map() str_query = dasquery.storage_query print "---------------" print "DAS-QL query :", query print "DAS query :", dasquery print "Mongo query :", mongo_query print "Storage query :", str_query print "Services :\n" for srv, val in service_map.items(): print "%s : %s\n" % (srv, ', '.join(val)) sys.exit(0) sdict = dascore.keys() if opts.services: msg = "DAS services:" print msg print "-"*len(msg) keys = sdict.keys() keys.sort() for key in keys: print key elif opts.service: msg = "DAS service %s:" % opts.service print msg print "-"*len(msg) keys = sdict[opts.service] keys.sort() for key in keys: print key elif query: idx = opts.idx limit = opts.limit output = opts.nooutput plain = opts.plain if opts.profile: import cProfile # python profiler import pstats # profiler statistics cmd = 'run(dascore,query,idx,limit,output,plain)' cProfile.runctx(cmd, globals(), locals(), 'profile.dat') info = pstats.Stats('profile.dat') info.sort_stats('cumulative') info.print_stats() else: run(dascore, query, idx, limit, output, plain) elif opts.dasconfig: print pformat(dascore.dasconfig) else: print print "DAS CLI interface, no actions found," print "please use --help for more options." timestamp = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) timer = get_das_timer() print "\nDAS execution time:\n" if debug: timelist = [] for _, timerdict in timer.items(): counter = timerdict['counter'] tag = timerdict['tag'] exetime = timerdict['time'] timelist.append((counter, tag, exetime)) timelist.sort() for _, tag, exetime in timelist: print "%s %s sec" % (tag, round(exetime, 2)) print "Total %s sec, %s" % (round(time.time()-t0, 2), timestamp)
def call(self, query, add_to_analytics=True, **kwds): """ Top level DAS api which execute a given query using underlying data-services. It follows the following steps: - parse input query - identify data-sercices based on selection keys and where clause conditions - construct DAS workflow and execute data-service API calls. At this step individual data-services store results into DAS cache. Return status 0/1 depending on success of the calls, can be used by workers on cache server. kwds is provided for compatibility with web layer, e.g. it may invoke this method with additional pid parameter. """ self.logger.info('input query=%s' % query) das_timer('DASCore::call', self.verbose) services = [] if isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': dasquery = query else: dasquery = DASQuery(query, mongoparser=self.mongoparser) if add_to_analytics: dasquery.add_to_analytics() query = dasquery.mongo_query if dasquery.mongo_query.has_key('system'): system = query['system'] if isinstance(system, str) or isinstance(system, unicode): services = [system] elif isinstance(system, list): services = system else: msg = 'Unsupported system=%s type=%s in DAS query' \ % (system, type(system)) raise Exception(msg) spec = query.get('spec') fields = query.get('fields') if fields == ['records']: msg = 'look-up all records in cache' self.logger.info(msg) return 'in cache' if spec == dict(records='*'): self.logger.info("look-up everything in cache") return 'in cache' for record in self.rawcache.find_specs(dasquery): status = record['das']['status'] msg = 'found query %s in cache, status=%s\n' \ % (record['query'], status) self.logger.info(msg) return status similar_dasquery = self.rawcache.similar_queries(dasquery) if similar_dasquery: for record in self.rawcache.find_specs(similar_dasquery): if record: try: status = record['das']['status'] except: status = 'N/A' msg = 'Fail to look-up das.status, record=%s' % record self.logger.info(msg) msg = 'found SIMILAR query in cache,' msg += 'query=%s, status=%s\n' % (record['query'], status) self.logger.info(msg) return status self.logger.info(dasquery) params = dasquery.params() if not services: services = params['services'] self.logger.info('services = %s' % services) das_timer('das_record', self.verbose) # initial expire tstamp 1 day (long enough to be overwriten by data-srv) expire = expire_timestamp(time.time()+1*24*60*60) header = dasheader("das", dasquery, expire) header['lookup_keys'] = [] self.rawcache.insert_query_record(dasquery, header) das_timer('das_record', self.verbose) try: if self.multitask: jobs = [] for srv in services: jobs.append(self.taskmgr.spawn(self.worker, srv, dasquery)) self.taskmgr.joinall(jobs) else: for srv in services: self.worker(srv, dasquery) except Exception as exc: print_exc(exc) return 'fail' self.logger.info('\n##### merging ######\n') self.rawcache.update_query_record(dasquery, 'merging') das_timer('merge', self.verbose) self.rawcache.merge_records(dasquery) das_timer('merge', self.verbose) self.rawcache.update_query_record(dasquery, 'ok') self.rawcache.add_to_record(\ dasquery, {'das.timer': get_das_timer()}, system='das') das_timer('DASCore::call', self.verbose) return 'ok'
def call(self, query, add_to_analytics=True, **kwds): """ Top level DAS api which execute a given query using underlying data-services. It follows the following steps: - parse input query - identify data-sercices based on selection keys and where clause conditions - construct DAS workflow and execute data-service API calls. At this step individual data-services store results into DAS cache. Return status 0/1 depending on success of the calls, can be used by workers on cache server. kwds is provided for compatibility with web layer, e.g. it may invoke this method with additional pid parameter. """ def update_das_query(dasquery, status, reason=None): "Update DAS query record with given status and reason" self.rawcache.update_query_record(dasquery, status, reason=reason) self.rawcache.add_to_record(\ dasquery, {'das.timer': get_das_timer()}, system='das') # make sure that das record is updated, we use 7 iteration which # sum up into 1 minute to cover default syncdelay value of mongo # server (in a future it would be better to find programatically # this syncdelay value, but it seems pymongo driver does not # provide any API for it. for idx in xrange(1, 7): spec = {'qhash':dasquery.qhash, 'das.system':['das']} res = self.rawcache.col.find_one(spec) if res: dbstatus = res.get('das', {}).get('status', None) if dbstatus == status: break msg = 'qhash %s, das.status=%s, status=%s, wait for update' \ % (dasquery.qhash, dbstatus, status) print dastimestamp('DAS WARNING'), msg time.sleep(idx*idx) self.rawcache.update_query_record(dasquery, status, reason=reason) self.logger.info('input query=%s' % query) das_timer('DASCore::call', self.verbose) if isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': dasquery = query else: dasquery = DASQuery(query) for col in ['merge', 'cache']: self.rawcache.remove_expired(dasquery, col) if add_to_analytics: dasquery.add_to_analytics() query = dasquery.mongo_query spec = query.get('spec') fields = query.get('fields') if fields == ['records']: msg = 'look-up all records in cache' self.logger.info(msg) return 'in cache' if spec == dict(records='*'): self.logger.info("look-up everything in cache") return 'in cache' for record in self.rawcache.find_specs(dasquery): status = record['das']['status'] msg = 'found query %s in cache, status=%s\n' \ % (record['query'], status) self.logger.info(msg) print dastimestamp('DAS INFO'), msg return status self.logger.info(dasquery) das_timer('das_record', self.verbose) services = self.insert_query_records(dasquery) if not services: msg = 'unable to locate data-services to fulfill this request' msg += ', will iterate over all registered services' print dastimestamp('DAS WARNING '), dasquery, msg services = dasquery.services if dasquery.services else self.systems try: if self.multitask: jobs = [] for srv in sorted(services): jobs.append(self.taskmgr.spawn(self.worker, srv, dasquery)) self.taskmgr.joinall(jobs) else: for srv in services: self.worker(srv, dasquery) except Exception as exc: print_exc(exc) return 'fail' self.logger.info('\n##### merging ######\n') update_das_query(dasquery, 'merging') das_timer('merge', self.verbose) self.rawcache.merge_records(dasquery) das_timer('merge', self.verbose) # check if we have service records and properly setup status self.logger.info('\n##### check services ######\n') das_services = self.rawcache.check_services(dasquery) reason = '' status = 'ok' if not das_services: if 'records' in dasquery.query: status = 'ok' # keep status ok for 'records' queries else: reason = 'no data records found in DAS cache' status = 'fail' print dastimestamp('DAS ERROR '), dasquery, reason update_das_query(dasquery, status, reason) das_timer('DASCore::call', self.verbose) return status
def listview(self, head, data): """ Represent data in list view. """ kwargs = head.get('args') uinput = kwargs.get('input', '') total = head.get('nresults', 0) apilist = head.get('apilist') dasquery = head.get('dasquery', None) if not dasquery: inst = head.get('instance', self.dbs_global) dasquery = DASQuery(uinput, instance=inst) inst = dasquery.instance filters = dasquery.filters aggrtrs = dasquery.aggregators pager = self.pagination(head) main = pager style = 'white' rowkeys = [] fltpage = self.filter_bar(dasquery) page = '' old = None dup = False status = head.get('status', None) if status == 'fail': reason = head.get('reason', '') if reason: page += '<br/><span class="box_red">%s</span>' % reason for row in data: if not row: continue if not dup and old and identical_data_records(old, row): dup = True error = row.get('error', None) try: mongo_id = row['_id'] except Exception as exc: msg = 'Exception: %s\n' % str(exc) msg += 'Fail to process row\n%s' % str(row) raise Exception(msg) page += '<div class="%s"><hr class="line" />' % style links = [] pkey = None pval = None lkey = None if 'das' in row and 'primary_key' in row['das']: pkey = row['das']['primary_key'] if pkey and not rowkeys and not fltpage: fltpage = self.fltpage(dasquery) try: lkey = pkey.split('.')[0] if pkey == 'summary': pval = row[pkey] else: pval = [i for i in DotDict(row).get_values(pkey)] if isinstance(pval, list): if pval and not isinstance(pval[0], list): pval = list(set(pval)) else: pval = list(set(pval)) if len(pval) == 1: pval = pval[0] if pkey == 'run.run_number' or pkey == 'lumi.number': if isinstance(pval, basestring): pval = int(pval) except Exception as exc: msg = "Fail to extract pval for pkey='%s', lkey='%s'" \ % (pkey, lkey) msg += "\npval='%s', type(pval)='%s'" % (pval, type(pval)) print(msg) print_exc(exc) pval = 'N/A' try: if not filters: if pkey == 'summary': page += 'Summary information:' elif pval and pval != 'N/A': page += '%s: ' % lkey.capitalize() if lkey == 'parent' or lkey == 'child': if str(pval).find('.root') != -1: lkey = 'file' else: lkey = 'dataset' if lkey in not_to_link(): page += '%s' % pval elif isinstance(pval, list): page += ', '.join(['<span class="highlight>"'+\ '<a href="/das/request?%s">%s</a></span>'\ % (make_args(lkey, i, inst), i) for i in pval]) else: args = make_args(lkey, pval, inst) page += '<span class="highlight">'+\ '<a href="/das/request?%s">%s</a></span>'\ % (args, pval) else: page += '%s: N/A' % lkey.capitalize() plist = self.dasmgr.mapping.presentation(lkey) linkrec = None for item in plist: if 'link' in item: linkrec = item['link'] break if linkrec and pval and pval != 'N/A' and \ not isinstance(pval, list) and not error: links += [l for l in make_links(linkrec, pval, inst)] if pkey and pkey == 'file.name': try: lfn = DotDict(row).get('file.name') val = '<a href="/das/download?lfn=%s">Download</a>'\ % lfn if lfn else '' if val: links.append(val) except: pass if pkey and pkey == 'site.name': try: site = DotDict(row).get('site.name') val = self.templatepage(\ 'sitedb', item=site, api="sites") if site else '' if val: links.append(val) except: pass if pkey and pkey == 'user.name': try: user = DotDict(row).get('user.username') val = self.templatepage(\ 'sitedb', item=user, api="people") if user else '' if val: links.append(val) except: pass if pkey and pkey == 'dataset.name': try: path = DotDict(row).get('dataset.name') if path: links.append(self.templatepage(\ 'makepy', path=path, inst=inst)) if inst == self.dbs_global: links.append(self.templatepage(\ 'phedex_subscription', path=path)) links.append(self.templatepage(\ 'xsecdb', primds=path.split('/')[1])) except: pass if pkey and pkey == 'release.name': rel = '["%s"]' % DotDict(row).get('release.name') url = 'https://cmstags.cern.ch/tc/py_getReleasesTags?' url += 'diff=false&releases=%s' % urllib.quote(rel) links.append('<a href="%s">Packages</a>' % url) except Exception as exc: print_exc(exc) pval = 'N/A' gen = self.convert2ui(row, pkey) if self.dasmgr: func = self.dasmgr.mapping.daskey_from_presentation if filters and not aggrtrs: page += add_filter_values(row, filters) else: page += adjust_values(func, gen, links, pkey) pad = "" try: if 'das' in row and 'system' in row['das']: systems = self.systems(row['das']['system']) else: systems = "" # no das record print(dastimestamp('DAS ERROR '), \ 'record without DAS key', row) except KeyError as exc: print_exc(exc) systems = "" # we don't store systems for aggregated records except Exception as exc: print_exc(exc) systems = "" # we don't store systems for aggregated records jsonhtml = das_json(dasquery, row, pad) jsonhtml = jsonhtml.replace(\ 'request?', 'request?instance=%s&' % inst) if not links: page += '<br />' if 'das' in row and 'conflict' in row['das']: conflict = ', '.join(row['das']['conflict']) else: conflict = '' hints = '' for hint in row.get('hints', {}): if hint: hints += self.templatepage('hint', hint=hint, base=self.base, dbs=self.dbs_global) page += self.templatepage('das_row', systems=systems, \ sanitized_data=jsonhtml, id=mongo_id, rec_id=mongo_id, conflict=conflict, hints=hints) page += '</div>' old = row main += fltpage if dup and not dasquery.aggregators: main += self.templatepage('das_duplicates', uinput=uinput, instance=inst) main += page if total>10: main += '<hr class="line" />' main += pager main += '<hr class="line" />' proc_time = self.processing_time(dasquery) if proc_time: msg = 'processing time: %5.3f sec, ' % proc_time else: msg = '' msg += 'cache server time: %5.3f sec' % head['ctime'] main += '<div align="right">%s</div>' % msg return main