def get_keypair_matches(self, location, query, candidates): matches = set([]) if query.find(':') >= 0: q = [q.strip() for q in query.split(':')] if len(q) != 2: raise ParseException( _('Invalid query format for colon-separated search: {0}').format(query)) (keyq, valq) = q keyq_mkind, keyq = self._matchkind(keyq) valq_mkind, valq = self._matchkind(valq) else: keyq = keyq_mkind = '' valq_mkind, valq = self._matchkind(query) loc = self.field_metadata[location]['rec_index'] split_char = self.field_metadata[location]['is_multiple'].get( 'cache_to_list', ',') for id_ in candidates: item = self._data[id_] if item is None: continue if item[loc] is None: if valq == 'false': matches.add(id_) continue add_if_nothing_matches = valq == 'false' pairs = [p.strip() for p in item[loc].split(split_char)] for pair in pairs: parts = pair.split(':') if len(parts) != 2: continue k = parts[:1] v = parts[1:] if keyq and not _match(keyq, k, keyq_mkind, use_primary_find_in_search=pref_use_primary_find_in_search): continue if valq: if valq == 'true': if not v: continue elif valq == 'false': if v: add_if_nothing_matches = False continue elif not _match(valq, v, valq_mkind, use_primary_find_in_search=pref_use_primary_find_in_search): continue matches.add(id_) if add_if_nothing_matches: matches.add(id_) return matches
def get_matches(self, location, query, candidates=None, allow_recursion=True): # If candidates is not None, it must not be modified. Changing its # value will break query optimization in the search parser matches = set([]) if candidates is None: candidates = self.universal_set() if len(candidates) == 0: return matches if location not in self.all_search_locations: return matches if len(location) > 2 and location.startswith('@') and \ location[1:] in self.db_prefs['grouped_search_terms']: location = location[1:] if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases original_location = location location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip())) # grouped search terms if isinstance(location, list): if allow_recursion: if query.lower() == 'false': invert = True query = 'true' else: invert = False for loc in location: c = candidates.copy() m = self.get_matches(loc, query, candidates=c, allow_recursion=False) matches |= m c -= m if len(c) == 0: break if invert: matches = self.universal_set() - matches return matches raise ParseException(_('Recursive query group detected: {0}').format(query)) # apply the limit if appropriate if location == 'all' and prefs['limit_search_columns'] and \ prefs['limit_search_columns_to']: terms = set([]) for l in prefs['limit_search_columns_to']: l = icu_lower(l.strip()) if l and l != 'all' and l in self.all_search_locations: terms.add(l) if terms: c = candidates.copy() for l in terms: try: m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion) matches |= m c -= m if len(c) == 0: break except: pass return matches if location in self.field_metadata: fm = self.field_metadata[location] # take care of dates special case if fm['datatype'] == 'datetime' or \ (fm['datatype'] == 'composite' and fm['display'].get('composite_sort', '') == 'date'): return self.get_dates_matches(location, query.lower(), candidates) # take care of numbers special case if fm['datatype'] in ('rating', 'int', 'float') or \ (fm['datatype'] == 'composite' and fm['display'].get('composite_sort', '') == 'number'): return self.get_numeric_matches(location, query.lower(), candidates) if fm['datatype'] == 'bool': return self.get_bool_matches(location, query, candidates) # take care of the 'count' operator for is_multiples if fm['is_multiple'] and \ len(query) > 1 and query.startswith('#') and \ query[1:1] in '=<>!': vf = lambda item, loc=fm['rec_index'], \ ms=fm['is_multiple']['cache_to_list']:\ len(item[loc].split(ms)) if item[loc] is not None else 0 return self.get_numeric_matches(location, query[1:], candidates, val_func=vf) # special case: colon-separated fields such as identifiers. isbn # is a special case within the case if fm.get('is_csp', False): if location == 'identifiers' and original_location == 'isbn': return self.get_keypair_matches('identifiers', '=isbn:'+query, candidates) return self.get_keypair_matches(location, query, candidates) # check for user categories if len(location) >= 2 and location.startswith('@'): return self.get_user_category_matches(location[1:], query.lower(), candidates) # everything else, or 'all' matches matchkind, query = self._matchkind(query) if not isinstance(query, unicode): query = query.decode('utf-8') db_col = {} exclude_fields = [] # fields to not check when matching against text. col_datatype = [] is_multiple_cols = {} for x in range(len(self.FIELD_MAP)): col_datatype.append('') for x in self.field_metadata: if x.startswith('@'): continue if len(self.field_metadata[x]['search_terms']): db_col[x] = self.field_metadata[x]['rec_index'] if self.field_metadata[x]['datatype'] not in \ ['composite', 'text', 'comments', 'series', 'enumeration']: exclude_fields.append(db_col[x]) col_datatype[db_col[x]] = self.field_metadata[x]['datatype'] is_multiple_cols[db_col[x]] = \ self.field_metadata[x]['is_multiple'].get('cache_to_list', None) try: rating_query = int(query) * 2 except: rating_query = None location = [location] if location != 'all' else list(db_col.keys()) for i, loc in enumerate(location): location[i] = db_col[loc] current_candidates = candidates.copy() for loc in location: # location is now an array of field indices if loc == db_col['authors']: # DB stores authors with commas changed to bars, so change query if matchkind == REGEXP_MATCH: q = query.replace(',', r'\|') else: q = query.replace(',', '|') elif loc == db_col['languages']: q = canonicalize_lang(query) if q is None: lm = lang_map() rm = {v.lower():k for k,v in lm.iteritems()} q = rm.get(query, query) else: q = query for id_ in current_candidates: item = self._data[id_] if item is None: continue if not item[loc]: if q == 'false' and matchkind == CONTAINS_MATCH: matches.add(item[0]) continue # item is empty. No possible matches below if q == 'false'and matchkind == CONTAINS_MATCH: # Field has something in it, so a false query does not match continue if q == 'true' and matchkind == CONTAINS_MATCH: if isinstance(item[loc], basestring): if item[loc].strip() == '': continue matches.add(item[0]) continue if col_datatype[loc] == 'rating': # get here if 'all' query if rating_query and rating_query == int(item[loc]): matches.add(item[0]) continue try: # a conversion below might fail # relationals are not supported in 'all' queries if col_datatype[loc] == 'float': if float(query) == item[loc]: matches.add(item[0]) continue if col_datatype[loc] == 'int': if int(query) == item[loc]: matches.add(item[0]) continue except: # A conversion threw an exception. Because of the type, # no further match is possible continue if loc not in exclude_fields: # time for text matching if is_multiple_cols[loc] is not None: vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] else: vals = [item[loc]] # make into list to make _match happy if _match(q, vals, matchkind, use_primary_find_in_search=pref_use_primary_find_in_search): matches.add(item[0]) continue current_candidates -= matches return matches
def get_matches(self, location, query): location = location.lower().strip() if location == 'formats': location = 'format' matchkind = CONTAINS_MATCH if len(query) > 1: if query.startswith('\\'): query = query[1:] elif query.startswith('='): matchkind = EQUALS_MATCH query = query[1:] elif query.startswith('~'): matchkind = REGEXP_MATCH query = query[1:] if matchkind != REGEXP_MATCH: # leave case in regexps because it can be significant e.g. \S \W \D query = query.lower() if location not in self.USABLE_LOCATIONS: return set() matches = set() all_locs = set(self.USABLE_LOCATIONS) - {'all'} locations = all_locs if location == 'all' else [location] q = { 'affiliate': lambda x: x.affiliate, 'description': lambda x: x.description.lower(), 'drm': lambda x: not x.drm_free_only, 'enabled': lambda x: not is_disabled(x), 'format': lambda x: ','.join(x.formats).lower(), 'headquarters': lambda x: x.headquarters.lower(), 'name': lambda x: x.name.lower(), } q['formats'] = q['format'] upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] if query == 'true': if locvalue in ('affiliate', 'drm', 'enabled'): if accessor(sr) == True: # noqa matches.add(sr) elif accessor(sr) is not None: matches.add(sr) continue if query == 'false': if locvalue in ('affiliate', 'drm', 'enabled'): if accessor(sr) == False: # noqa matches.add(sr) elif accessor(sr) is None: matches.add(sr) continue # this is bool, so can't match below if locvalue in ('affiliate', 'drm', 'enabled'): continue try: # Can't separate authors because comma is used for name sep and author sep # Exact match might not get what you want. For that reason, turn author # exactmatch searches into contains searches. if locvalue == 'name' and matchkind == EQUALS_MATCH: m = CONTAINS_MATCH else: m = matchkind if locvalue == 'format': vals = accessor(sr).split(',') else: vals = [accessor(sr)] if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors import traceback traceback.print_exc() return matches
def get_matches(self, location, query): location = location.lower().strip() if location == 'authors': location = 'author' elif location == 'formats': location = 'format' matchkind = CONTAINS_MATCH if len(query) > 1: if query.startswith('\\'): query = query[1:] elif query.startswith('='): matchkind = EQUALS_MATCH query = query[1:] elif query.startswith('~'): matchkind = REGEXP_MATCH query = query[1:] if matchkind != REGEXP_MATCH: # leave case in regexps because it can be significant e.g. \S \W \D query = query.lower() if location not in self.USABLE_LOCATIONS: return set([]) matches = set([]) all_locs = set(self.USABLE_LOCATIONS) - set(['all']) locations = all_locs if location == 'all' else [location] q = { 'author': lambda x: x.author.lower(), 'format': attrgetter('formats'), 'title': lambda x: x.title.lower(), } for x in ('author', 'format'): q[x + 's'] = q[x] upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] if query == 'true': if accessor(sr) is not None: matches.add(sr) continue if query == 'false': if accessor(sr) is None: matches.add(sr) continue try: # Can't separate authors because comma is used for name sep and author sep # Exact match might not get what you want. For that reason, turn author # exactmatch searches into contains searches. if locvalue == 'author' and matchkind == EQUALS_MATCH: m = CONTAINS_MATCH else: m = matchkind vals = [accessor(sr)] if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors import traceback traceback.print_exc() return matches
def get_matches(self, location, query): location = location.lower().strip() if location == 'formats': location = 'format' matchkind = CONTAINS_MATCH if len(query) > 1: if query.startswith('\\'): query = query[1:] elif query.startswith('='): matchkind = EQUALS_MATCH query = query[1:] elif query.startswith('~'): matchkind = REGEXP_MATCH query = query[1:] if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D query = query.lower() if location not in self.USABLE_LOCATIONS: return set([]) matches = set([]) all_locs = set(self.USABLE_LOCATIONS) - set(['all']) locations = all_locs if location == 'all' else [location] q = { 'affiliate': lambda x: x.affiliate, 'description': lambda x: x.description.lower(), 'drm': lambda x: not x.drm_free_only, 'enabled': lambda x: not is_disabled(x), 'format': lambda x: ','.join(x.formats).lower(), 'headquarters': lambda x: x.headquarters.lower(), 'name': lambda x : x.name.lower(), } q['formats'] = q['format'] upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] if query == 'true': if locvalue in ('affiliate', 'drm', 'enabled'): if accessor(sr) == True: matches.add(sr) elif accessor(sr) is not None: matches.add(sr) continue if query == 'false': if locvalue in ('affiliate', 'drm', 'enabled'): if accessor(sr) == False: matches.add(sr) elif accessor(sr) is None: matches.add(sr) continue # this is bool, so can't match below if locvalue in ('affiliate', 'drm', 'enabled'): continue try: ### Can't separate authors because comma is used for name sep and author sep ### Exact match might not get what you want. For that reason, turn author ### exactmatch searches into contains searches. if locvalue == 'name' and matchkind == EQUALS_MATCH: m = CONTAINS_MATCH else: m = matchkind if locvalue == 'format': vals = accessor(sr).split(',') else: vals = [accessor(sr)] if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors import traceback traceback.print_exc() return matches
def get_matches(self, location, query): location = location.lower().strip() if location == 'authors': location = 'author' elif location == 'formats': location = 'format' matchkind = CONTAINS_MATCH if len(query) > 1: if query.startswith('\\'): query = query[1:] elif query.startswith('='): matchkind = EQUALS_MATCH query = query[1:] elif query.startswith('~'): matchkind = REGEXP_MATCH query = query[1:] if matchkind != REGEXP_MATCH: # leave case in regexps because it can be significant e.g. \S \W \D query = query.lower() if location not in self.USABLE_LOCATIONS: return set([]) matches = set([]) all_locs = set(self.USABLE_LOCATIONS) - {'all'} locations = all_locs if location == 'all' else [location] q = { 'author': lambda x: x.author.lower(), 'format': attrgetter('formats'), 'title': lambda x: x.title.lower(), } for x in ('author', 'format'): q[x+'s'] = q[x] upf = prefs['use_primary_find_in_search'] for sr in self.srs: for locvalue in locations: accessor = q[locvalue] if query == 'true': if accessor(sr) is not None: matches.add(sr) continue if query == 'false': if accessor(sr) is None: matches.add(sr) continue try: # Can't separate authors because comma is used for name sep and author sep # Exact match might not get what you want. For that reason, turn author # exactmatch searches into contains searches. if locvalue == 'author' and matchkind == EQUALS_MATCH: m = CONTAINS_MATCH else: m = matchkind vals = [accessor(sr)] if _match(query, vals, m, use_primary_find_in_search=upf): matches.add(sr) break except ValueError: # Unicode errors import traceback traceback.print_exc() return matches
def get_matches(self, location, query, candidates=None, allow_recursion=True): # If candidates is not None, it must not be modified. Changing its # value will break query optimization in the search parser matches = set([]) if candidates is None: candidates = self.universal_set() if len(candidates) == 0: return matches if location not in self.all_search_locations: return matches if len(location) > 2 and location.startswith("@") and location[1:] in self.db_prefs["grouped_search_terms"]: location = location[1:] if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases original_location = location location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip())) # grouped search terms if isinstance(location, list): if allow_recursion: if query.lower() == "false": invert = True query = "true" else: invert = False for loc in location: c = candidates.copy() m = self.get_matches(loc, query, candidates=c, allow_recursion=False) matches |= m c -= m if len(c) == 0: break if invert: matches = self.universal_set() - matches return matches raise ParseException(_("Recursive query group detected: {0}").format(query)) # apply the limit if appropriate if location == "all" and prefs["limit_search_columns"] and prefs["limit_search_columns_to"]: terms = set([]) for l in prefs["limit_search_columns_to"]: l = icu_lower(l.strip()) if l and l != "all" and l in self.all_search_locations: terms.add(l) if terms: c = candidates.copy() for l in terms: try: m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion) matches |= m c -= m if len(c) == 0: break except: pass return matches if location in self.field_metadata: fm = self.field_metadata[location] # take care of dates special case if fm["datatype"] == "datetime" or ( fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "date" ): return self.get_dates_matches(location, query.lower(), candidates) # take care of numbers special case if fm["datatype"] in ("rating", "int", "float") or ( fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "number" ): return self.get_numeric_matches(location, query.lower(), candidates) if fm["datatype"] == "bool": return self.get_bool_matches(location, query, candidates) # take care of the 'count' operator for is_multiples if fm["is_multiple"] and len(query) > 1 and query.startswith("#") and query[1:1] in "=<>!": vf = ( lambda item, loc=fm["rec_index"], ms=fm["is_multiple"]["cache_to_list"]: len( item[loc].split(ms) ) if item[loc] is not None else 0 ) return self.get_numeric_matches(location, query[1:], candidates, val_func=vf) # special case: colon-separated fields such as identifiers. isbn # is a special case within the case if fm.get("is_csp", False): if location == "identifiers" and original_location == "isbn": return self.get_keypair_matches("identifiers", "=isbn:" + query, candidates) return self.get_keypair_matches(location, query, candidates) # check for user categories if len(location) >= 2 and location.startswith("@"): return self.get_user_category_matches(location[1:], query.lower(), candidates) # everything else, or 'all' matches matchkind, query = self._matchkind(query) if not isinstance(query, unicode): query = query.decode("utf-8") db_col = {} exclude_fields = [] # fields to not check when matching against text. col_datatype = [] is_multiple_cols = {} for x in range(len(self.FIELD_MAP)): col_datatype.append("") for x in self.field_metadata: if x.startswith("@"): continue if len(self.field_metadata[x]["search_terms"]): db_col[x] = self.field_metadata[x]["rec_index"] if self.field_metadata[x]["datatype"] not in [ "composite", "text", "comments", "series", "enumeration", ]: exclude_fields.append(db_col[x]) col_datatype[db_col[x]] = self.field_metadata[x]["datatype"] is_multiple_cols[db_col[x]] = self.field_metadata[x]["is_multiple"].get("cache_to_list", None) try: rating_query = int(query) * 2 except: rating_query = None location = [location] if location != "all" else list(db_col.keys()) for i, loc in enumerate(location): location[i] = db_col[loc] current_candidates = candidates.copy() for loc in location: # location is now an array of field indices if loc == db_col["authors"]: # DB stores authors with commas changed to bars, so change query if matchkind == REGEXP_MATCH: q = query.replace(",", r"\|") else: q = query.replace(",", "|") elif loc == db_col["languages"]: q = canonicalize_lang(query) if q is None: lm = lang_map() rm = {v.lower(): k for k, v in lm.iteritems()} q = rm.get(query, query) else: q = query for id_ in current_candidates: item = self._data[id_] if item is None: continue if not item[loc]: if q == "false" and matchkind == CONTAINS_MATCH: matches.add(item[0]) continue # item is empty. No possible matches below if q == "false" and matchkind == CONTAINS_MATCH: # Field has something in it, so a false query does not match continue if q == "true" and matchkind == CONTAINS_MATCH: if isinstance(item[loc], basestring): if item[loc].strip() == "": continue matches.add(item[0]) continue if col_datatype[loc] == "rating": # get here if 'all' query if rating_query and rating_query == int(item[loc]): matches.add(item[0]) continue try: # a conversion below might fail # relationals are not supported in 'all' queries if col_datatype[loc] == "float": if float(query) == item[loc]: matches.add(item[0]) continue if col_datatype[loc] == "int": if int(query) == item[loc]: matches.add(item[0]) continue except: # A conversion threw an exception. Because of the type, # no further match is possible continue if loc not in exclude_fields: # time for text matching if is_multiple_cols[loc] is not None: vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] else: vals = [item[loc]] # make into list to make _match happy if _match(q, vals, matchkind, use_primary_find_in_search=pref_use_primary_find_in_search): matches.add(item[0]) continue current_candidates -= matches return matches