def match_value_dataset(kwd, dbs_inst=None): """ return keyword matches to dataset values in dbsmanager """ # if no specific dbs_inst passed, get the current one from request if not dbs_inst: if not hasattr(request, 'dbs_inst'): return None, None dbs_inst = request.dbs_inst dataset_score = None # make sure the kwd is unicode if not isinstance(kwd, unicode) and isinstance(kwd, str): kwd = unicode(kwd) upd_kwd = kwd # dbsmgr.find returns a generator, check if it's non empty match = find_datasets(kwd, dbs_inst, limit=1) if next(match, False): if DEBUG: print 'Dataset matched by keyword %s' % kwd # if kw contains wildcards the score shall be a bit lower if '*' in kwd and not '/' in kwd: dataset_score = 0.8 elif '*' in kwd and '/' in kwd: dataset_score = 0.9 elif not '*' in kwd and not '/' in kwd: if next(find_datasets('*%s*' % kwd, dbs_inst, limit=1), False): dataset_score = 0.7 upd_kwd = '*%s*' % kwd else: dataset_score = 1.0 # prevent number-only-keywords to be matched into datasets if kwd.isnumeric(): dataset_score -= 0.3 # add extra wildcard to make sure the query will work... if not RE_3SLAHES.match(upd_kwd): upd_kwd0 = upd_kwd if not upd_kwd.startswith('*') and not upd_kwd.startswith('/'): upd_kwd = '*' + upd_kwd if not upd_kwd0.endswith('*') or '*' not in upd_kwd0: upd_kwd += '*' return dataset_score, {'map_to': 'dataset.name', 'adjusted_keyword': upd_kwd}
def __init__(self, query, **flags): """ Accepts general form of DAS query, supported formats are DAS input query, DAS mongo query, DAS storage query. The supplied flags can carry any query attributes, e.g. filters, aggregators, system, instance, etc. """ self._mongoparser = None self._params = {} self._service_apis_map = {} self._str = '' self._query = '' self._query_full = '' self._storage_query = {} self._mongo_query = {} self._qhash = None self._hashes = None self._system = None self._instance = None self._loose_query = None self._pattern_query = None self._sortkeys = [] self._filters = {} self._mapreduce = [] self._aggregators = [] self._flags = flags # loop over flags and set available attributes for key, val in flags.iteritems(): setattr(self, '_%s' % key, val) # test data type of input query and apply appropriate initialization if isinstance(query, basestring): self._query = query try: self._mongo_query = self.mongoparser.parse(query) for key, val in flags.iteritems(): if key in self.NON_CACHEABLE_FLAGS: continue if key not in self._mongo_query: self._mongo_query[key] = val except Exception as exp: msg = "Fail to parse DAS query='%s', %s" % (query, str(exp)) print_exc(msg, print_traceback=True) raise exp elif isinstance(query, dict): newquery = {} for key, val in query.iteritems(): newquery[key] = val if isinstance(newquery.get('spec'), dict): # mongo query self._mongo_query = newquery else: # storage query self._storage_query = newquery elif isinstance(query, object) and hasattr(query, '__class__')\ and query.__class__.__name__ == 'DASQuery': self._query = query.query self._hashes = query.hashes self._mongo_query = query.mongo_query self._storage_query = query.storage_query else: raise Exception('Unsupported data type of DAS query') self.update_attr() # check dataset wild-cards for key, val in self._mongo_query['spec'].items(): if key == 'dataset.name': # only match dataset.name but do not primary_dataset.name if not RE_3SLAHES.match(val): # TODO: we currently do not support wildcard matching # from command line interface if not self._instance: continue # apply 3 slash pattern look-up, continuing only if one # interpretation existings here, ticket #3071 self._handle_dataset_slashes(key, val)