def get_bool_matches(self, location, query, candidates): bools_are_tristate = self.db_prefs.get('bools_are_tristate') loc = self.field_metadata[location]['rec_index'] matches = set() query = icu_lower(query) if query not in self.local_bool_values: raise ParseException(_('Invalid boolean query "{0}"').format(query)) for id_ in candidates: item = self._data[id_] if item is None: continue val = force_to_bool(item[loc]) if not bools_are_tristate: if val is None or not val: # item is None or set to false if query in (self.local_no, self.local_unchecked, '_no', 'false'): matches.add(item[0]) else: # item is explicitly set to true if query in (self.local_yes, self.local_checked, '_yes', 'true'): matches.add(item[0]) else: if val is None: if query in (self.local_empty, self.local_blank, '_empty', 'false'): matches.add(item[0]) elif not val: # is not None and false if query in (self.local_no, self.local_unchecked, '_no', 'true'): matches.add(item[0]) else: # item is not None and true if query in (self.local_yes, self.local_checked, '_yes', 'true'): matches.add(item[0]) return matches
def get_keypair_matches(self, location, query, candidates): matches = set([]) if query.find(':') >= 0: q = [q.strip() for q in query.split(':')] if len(q) != 2: raise ParseException( _('Invalid query format for colon-separated search: {0}').format(query)) (keyq, valq) = q keyq_mkind, keyq = self._matchkind(keyq) valq_mkind, valq = self._matchkind(valq) else: keyq = keyq_mkind = '' valq_mkind, valq = self._matchkind(query) loc = self.field_metadata[location]['rec_index'] split_char = self.field_metadata[location]['is_multiple'].get( 'cache_to_list', ',') for id_ in candidates: item = self._data[id_] if item is None: continue if item[loc] is None: if valq == 'false': matches.add(id_) continue add_if_nothing_matches = valq == 'false' pairs = [p.strip() for p in item[loc].split(split_char)] for pair in pairs: parts = pair.split(':') if len(parts) != 2: continue k = parts[:1] v = parts[1:] if keyq and not _match(keyq, k, keyq_mkind, use_primary_find_in_search=pref_use_primary_find_in_search): continue if valq: if valq == 'true': if not v: continue elif valq == 'false': if v: add_if_nothing_matches = False continue elif not _match(valq, v, valq_mkind, use_primary_find_in_search=pref_use_primary_find_in_search): continue matches.add(id_) if add_if_nothing_matches: matches.add(id_) return matches
def __call__(self, query, field_iter, candidates, use_primary_find): matches = set() if ':' in query: q = [q.strip() for q in query.split(':')] if len(q) != 2: raise ParseException( _('Invalid query format for colon-separated search: {0}'). format(query)) keyq, valq = q keyq_mkind, keyq = _matchkind(keyq) valq_mkind, valq = _matchkind(valq) else: keyq = keyq_mkind = '' valq_mkind, valq = _matchkind(query) keyq_mkind if valq in {'true', 'false'}: found = set() if keyq: for val, book_ids in field_iter(): if val and val.get(keyq, False): found |= book_ids else: for val, book_ids in field_iter(): if val: found |= book_ids return found if valq == 'true' else candidates - found for m, book_ids in field_iter(): for key, val in m.iteritems(): if (keyq and not _match( keyq, (key, ), keyq_mkind, use_primary_find_in_search=use_primary_find)): continue if (valq and not _match( valq, (val, ), valq_mkind, use_primary_find_in_search=use_primary_find)): continue matches |= book_ids break return matches
def __call__(self, query, field_iter, bools_are_tristate): matches = set() if query not in self.local_bool_values: raise ParseException( _('Invalid boolean query "{0}"').format(query)) for val, book_ids in field_iter(): val = force_to_bool(val) if not bools_are_tristate: if val is None or not val: # item is None or set to false if query in { self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'false' }: matches |= book_ids else: # item is explicitly set to true if query in { self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true' }: matches |= book_ids else: if val is None: if query in { self.local_empty, self.local_blank, 'blank', '_blank', 'empty', '_empty', 'false' }: matches |= book_ids elif not val: # is not None and false if query in { self.local_no, self.local_unchecked, 'unchecked', '_unchecked', 'no', '_no', 'true' }: matches |= book_ids else: # item is not None and true if query in { self.local_yes, self.local_checked, 'checked', '_checked', 'yes', '_yes', 'true' }: matches |= book_ids return matches
def get_matches(self, location, query, candidates=None, allow_recursion=True): # If candidates is not None, it must not be modified. Changing its # value will break query optimization in the search parser matches = set([]) if candidates is None: candidates = self.universal_set() if len(candidates) == 0: return matches if location not in self.all_search_locations: return matches if len(location) > 2 and location.startswith('@') and \ location[1:] in self.db_prefs['grouped_search_terms']: location = location[1:] if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases original_location = location location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip())) # grouped search terms if isinstance(location, list): if allow_recursion: if query.lower() == 'false': invert = True query = 'true' else: invert = False for loc in location: c = candidates.copy() m = self.get_matches(loc, query, candidates=c, allow_recursion=False) matches |= m c -= m if len(c) == 0: break if invert: matches = self.universal_set() - matches return matches raise ParseException(_('Recursive query group detected: {0}').format(query)) # apply the limit if appropriate if location == 'all' and prefs['limit_search_columns'] and \ prefs['limit_search_columns_to']: terms = set([]) for l in prefs['limit_search_columns_to']: l = icu_lower(l.strip()) if l and l != 'all' and l in self.all_search_locations: terms.add(l) if terms: c = candidates.copy() for l in terms: try: m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion) matches |= m c -= m if len(c) == 0: break except: pass return matches if location in self.field_metadata: fm = self.field_metadata[location] # take care of dates special case if fm['datatype'] == 'datetime' or \ (fm['datatype'] == 'composite' and fm['display'].get('composite_sort', '') == 'date'): return self.get_dates_matches(location, query.lower(), candidates) # take care of numbers special case if fm['datatype'] in ('rating', 'int', 'float') or \ (fm['datatype'] == 'composite' and fm['display'].get('composite_sort', '') == 'number'): return self.get_numeric_matches(location, query.lower(), candidates) if fm['datatype'] == 'bool': return self.get_bool_matches(location, query, candidates) # take care of the 'count' operator for is_multiples if fm['is_multiple'] and \ len(query) > 1 and query.startswith('#') and \ query[1:1] in '=<>!': vf = lambda item, loc=fm['rec_index'], \ ms=fm['is_multiple']['cache_to_list']:\ len(item[loc].split(ms)) if item[loc] is not None else 0 return self.get_numeric_matches(location, query[1:], candidates, val_func=vf) # special case: colon-separated fields such as identifiers. isbn # is a special case within the case if fm.get('is_csp', False): if location == 'identifiers' and original_location == 'isbn': return self.get_keypair_matches('identifiers', '=isbn:'+query, candidates) return self.get_keypair_matches(location, query, candidates) # check for user categories if len(location) >= 2 and location.startswith('@'): return self.get_user_category_matches(location[1:], query.lower(), candidates) # everything else, or 'all' matches matchkind, query = self._matchkind(query) if not isinstance(query, unicode): query = query.decode('utf-8') db_col = {} exclude_fields = [] # fields to not check when matching against text. col_datatype = [] is_multiple_cols = {} for x in range(len(self.FIELD_MAP)): col_datatype.append('') for x in self.field_metadata: if x.startswith('@'): continue if len(self.field_metadata[x]['search_terms']): db_col[x] = self.field_metadata[x]['rec_index'] if self.field_metadata[x]['datatype'] not in \ ['composite', 'text', 'comments', 'series', 'enumeration']: exclude_fields.append(db_col[x]) col_datatype[db_col[x]] = self.field_metadata[x]['datatype'] is_multiple_cols[db_col[x]] = \ self.field_metadata[x]['is_multiple'].get('cache_to_list', None) try: rating_query = int(query) * 2 except: rating_query = None location = [location] if location != 'all' else list(db_col.keys()) for i, loc in enumerate(location): location[i] = db_col[loc] current_candidates = candidates.copy() for loc in location: # location is now an array of field indices if loc == db_col['authors']: # DB stores authors with commas changed to bars, so change query if matchkind == REGEXP_MATCH: q = query.replace(',', r'\|') else: q = query.replace(',', '|') elif loc == db_col['languages']: q = canonicalize_lang(query) if q is None: lm = lang_map() rm = {v.lower():k for k,v in lm.iteritems()} q = rm.get(query, query) else: q = query for id_ in current_candidates: item = self._data[id_] if item is None: continue if not item[loc]: if q == 'false' and matchkind == CONTAINS_MATCH: matches.add(item[0]) continue # item is empty. No possible matches below if q == 'false'and matchkind == CONTAINS_MATCH: # Field has something in it, so a false query does not match continue if q == 'true' and matchkind == CONTAINS_MATCH: if isinstance(item[loc], basestring): if item[loc].strip() == '': continue matches.add(item[0]) continue if col_datatype[loc] == 'rating': # get here if 'all' query if rating_query and rating_query == int(item[loc]): matches.add(item[0]) continue try: # a conversion below might fail # relationals are not supported in 'all' queries if col_datatype[loc] == 'float': if float(query) == item[loc]: matches.add(item[0]) continue if col_datatype[loc] == 'int': if int(query) == item[loc]: matches.add(item[0]) continue except: # A conversion threw an exception. Because of the type, # no further match is possible continue if loc not in exclude_fields: # time for text matching if is_multiple_cols[loc] is not None: vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])] else: vals = [item[loc]] # make into list to make _match happy if _match(q, vals, matchkind, use_primary_find_in_search=pref_use_primary_find_in_search): matches.add(item[0]) continue current_candidates -= matches return matches
def get_numeric_matches(self, location, query, candidates, val_func=None): matches = set([]) if len(query) == 0: return matches if val_func is None: loc = self.field_metadata[location]['rec_index'] val_func = lambda item, loc=loc: item[loc] q = '' cast = adjust = lambda x: x dt = self.field_metadata[location]['datatype'] if query == 'false': if dt == 'rating' or location == 'cover': relop = lambda x,y: not bool(x) else: relop = lambda x,y: x is None elif query == 'true': if dt == 'rating' or location == 'cover': relop = lambda x,y: bool(x) else: relop = lambda x,y: x is not None else: relop = None for k in self.numeric_search_relops.keys(): if query.startswith(k): (p, relop) = self.numeric_search_relops[k] query = query[p:] if relop is None: (p, relop) = self.numeric_search_relops['='] if dt == 'int': cast = lambda x: int(x) elif dt == 'rating': cast = lambda x: 0 if x is None else int(x) adjust = lambda x: x/2 elif dt in ('float', 'composite'): cast = lambda x : float(x) else: # count operation cast = (lambda x: int(x)) if len(query) > 1: mult = query[-1:].lower() mult = {'k':1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0) if mult != 1.0: query = query[:-1] else: mult = 1.0 try: q = cast(query) * mult except: raise ParseException(_('Non-numeric value in query: {0}').format(query)) for id_ in candidates: item = self._data[id_] if item is None: continue try: v = cast(val_func(item)) except: v = None if v: v = adjust(v) if relop(v, q): matches.add(item[0]) return matches
def get_dates_matches(self, location, query, candidates): matches = set([]) if len(query) < 2: return matches if location == 'date': location = 'timestamp' loc = self.field_metadata[location]['rec_index'] if query == 'false': for id_ in candidates: item = self._data[id_] if item is None: continue v = item[loc] if isinstance(v, (str, unicode)): v = parse_date(v) if v is None or v <= UNDEFINED_DATE: matches.add(item[0]) return matches if query == 'true': for id_ in candidates: item = self._data[id_] if item is None: continue v = item[loc] if isinstance(v, (str, unicode)): v = parse_date(v) if v is not None and v > UNDEFINED_DATE: matches.add(item[0]) return matches relop = None for k in self.date_search_relops.keys(): if query.startswith(k): (p, relop) = self.date_search_relops[k] query = query[p:] if relop is None: (p, relop) = self.date_search_relops['='] if query in self.local_today: qd = now() field_count = 3 elif query in self.local_yesterday: qd = now() - timedelta(1) field_count = 3 elif query in self.local_thismonth: qd = now() field_count = 2 elif query.endswith(self.local_daysago) or query.endswith(self.untrans_daysago): num = query[0:-(self.local_daysago_len if query.endswith(self.local_daysago) else self.untrans_daysago_len)] try: qd = now() - timedelta(int(num)) except: raise ParseException(_('Number conversion error: {0}').format(num)) field_count = 3 else: try: qd = parse_date(query, as_utc=False) except: raise ParseException(_('Date conversion error: {0}').format(query)) if '-' in query: field_count = query.count('-') + 1 else: field_count = query.count('/') + 1 for id_ in candidates: item = self._data[id_] if item is None or item[loc] is None: continue v = item[loc] if isinstance(v, (str, unicode)): v = parse_date(v) if relop(v, qd, field_count): matches.add(item[0]) return matches
def get_matches(self, location, query, candidates=None, allow_recursion=True): # If candidates is not None, it must not be modified. Changing its # value will break query optimization in the search parser matches = set() if candidates is None: candidates = self.all_book_ids if not candidates or not query or not query.strip(): return matches if location not in self.all_search_locations: return matches if (len(location) > 2 and location.startswith('@') and location[1:] in self.grouped_search_terms): location = location[1:] # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases original_location = location location = self.field_metadata.search_term_to_field_key( icu_lower(location.strip())) # grouped search terms if isinstance(location, list): if allow_recursion: if query.lower() == 'false': invert = True query = 'true' else: invert = False for loc in location: c = candidates.copy() m = self.get_matches(loc, query, candidates=c, allow_recursion=False) matches |= m c -= m if len(c) == 0: break if invert: matches = self.all_book_ids - matches return matches raise ParseException( _('Recursive query group detected: {0}').format(query)) # If the user has asked to restrict searching over all field, apply # that restriction if (location == 'all' and self.limit_search_columns and self.limit_search_columns_to): terms = set() for l in self.limit_search_columns_to: l = icu_lower(l.strip()) if l and l != 'all' and l in self.all_search_locations: terms.add(l) if terms: c = candidates.copy() for l in terms: try: m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion) matches |= m c -= m if len(c) == 0: break except: pass return matches upf = prefs['use_primary_find_in_search'] if location in self.field_metadata: fm = self.field_metadata[location] dt = fm['datatype'] # take care of dates special case if (dt == 'datetime' or (dt == 'composite' and fm['display'].get('composite_sort', '') == 'date')): if location == 'date': location = 'timestamp' return self.date_search( icu_lower(query), partial(self.field_iter, location, candidates)) # take care of numbers special case if (dt in ('rating', 'int', 'float') or (dt == 'composite' and fm['display'].get('composite_sort', '') == 'number')): if location == 'id': is_many = False def fi(default_value=None): for qid in candidates: yield qid, {qid} else: field = self.dbcache.fields[location] fi, is_many = partial(self.field_iter, location, candidates), field.is_many return self.num_search(icu_lower(query), fi, location, dt, candidates, is_many=is_many) # take care of the 'count' operator for is_multiples if (fm['is_multiple'] and len(query) > 1 and query[0] == '#' and query[1] in '=<>!'): return self.num_search( icu_lower(query[1:]), partial(self.dbcache.fields[location].iter_counts, candidates), location, dt, candidates) # take care of boolean special case if dt == 'bool': return self.bool_search( icu_lower(query), partial(self.field_iter, location, candidates), self.dbcache._pref('bools_are_tristate')) # special case: colon-separated fields such as identifiers. isbn # is a special case within the case if fm.get('is_csp', False): field_iter = partial(self.field_iter, location, candidates) if location == 'identifiers' and original_location == 'isbn': return self.keypair_search('=isbn:' + query, field_iter, candidates, upf) return self.keypair_search(query, field_iter, candidates, upf) # check for user categories if len(location) >= 2 and location.startswith('@'): return self.get_user_category_matches(location[1:], icu_lower(query), candidates) # Everything else (and 'all' matches) case_sensitive = prefs['case_sensitive'] matchkind, query = _matchkind(query, case_sensitive=case_sensitive) all_locs = set() text_fields = set() field_metadata = {} for x, fm in self.field_metadata.iteritems(): if x.startswith('@'): continue if fm['search_terms'] and x not in {'series_sort', 'id'}: if x not in self.virtual_fields and x != 'uuid': # We dont search virtual fields because if we do, search # caching will not be used all_locs.add(x) field_metadata[x] = fm if fm['datatype'] in { 'composite', 'text', 'comments', 'series', 'enumeration' }: text_fields.add(x) locations = all_locs if location == 'all' else {location} current_candidates = set(candidates) try: rating_query = int(float(query)) * 2 except: rating_query = None try: int_query = int(float(query)) except: int_query = None try: float_query = float(query) except: float_query = None for location in locations: current_candidates -= matches q = query if location == 'languages': q = canonicalize_lang(query) if q is None: lm = lang_map() rm = {v.lower(): k for k, v in lm.iteritems()} q = rm.get(query, query) if matchkind == CONTAINS_MATCH and q.lower() in {'true', 'false'}: found = set() for val, book_ids in self.field_iter(location, current_candidates): if val and (not hasattr(val, 'strip') or val.strip()): found |= book_ids matches |= (found if q.lower() == 'true' else (current_candidates - found)) continue dt = field_metadata.get(location, {}).get('datatype', None) if dt == 'rating': if rating_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == rating_query: matches |= book_ids continue if dt == 'float': if float_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == float_query: matches |= book_ids continue if dt == 'int': if int_query is not None: for val, book_ids in self.field_iter( location, current_candidates): if val == int_query: matches |= book_ids continue if location in text_fields: for val, book_ids in self.field_iter(location, current_candidates): if val is not None: if isinstance(val, basestring): val = (val, ) if _match(q, val, matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): matches |= book_ids if location == 'series_sort': book_lang_map = self.dbcache.fields['languages'].book_value_map for val, book_ids in self.dbcache.fields[ 'series'].iter_searchable_values_for_sort( current_candidates, book_lang_map): if val is not None: if _match(q, (val, ), matchkind, use_primary_find_in_search=upf, case_sensitive=case_sensitive): matches |= book_ids return matches
def __call__(self, query, field_iter, location, datatype, candidates, is_many=False): matches = set() if not query: return matches q = '' cast = adjust = lambda x: x dt = datatype if is_many and query in {'true', 'false'}: valcheck = lambda x: True if datatype == 'rating': valcheck = lambda x: x is not None and x > 0 found = set() for val, book_ids in field_iter(): if valcheck(val): found |= book_ids return found if query == 'true' else candidates - found if query == 'false': if location == 'cover': relop = lambda x, y: not bool(x) else: relop = lambda x, y: x is None elif query == 'true': if location == 'cover': relop = lambda x, y: bool(x) else: relop = lambda x, y: x is not None else: for k, relop in self.operators.iteritems(): if query.startswith(k): query = query[len(k):] break else: relop = self.operators['='] cast = int if dt == 'rating': cast = lambda x: 0 if x is None else int(x) adjust = lambda x: x // 2 elif dt in ('float', 'composite'): cast = float mult = 1.0 if len(query) > 1: mult = query[-1].lower() mult = { 'k': 1024., 'm': 1024.**2, 'g': 1024.**3 }.get(mult, 1.0) if mult != 1.0: query = query[:-1] else: mult = 1.0 try: q = cast(query) * mult except: raise ParseException( _('Non-numeric value in query: {0}').format(query)) qfalse = query == 'false' for val, book_ids in field_iter(): if val is None: if qfalse: matches |= book_ids continue try: v = cast(val) except: v = None if v: v = adjust(v) if relop(v, q): matches |= book_ids return matches
def __call__(self, query, field_iter): matches = set() if len(query) < 2: return matches if query == 'false': for v, book_ids in field_iter(): if isinstance(v, (str, unicode)): v = parse_date(v) if v is None or v <= UNDEFINED_DATE: matches |= book_ids return matches if query == 'true': for v, book_ids in field_iter(): if isinstance(v, (str, unicode)): v = parse_date(v) if v is not None and v > UNDEFINED_DATE: matches |= book_ids return matches for k, relop in self.operators.iteritems(): if query.startswith(k): query = query[len(k):] break else: relop = self.operators['='] if query in self.local_today: qd = now() field_count = 3 elif query in self.local_yesterday: qd = now() - timedelta(1) field_count = 3 elif query in self.local_thismonth: qd = now() field_count = 2 else: m = self.daysago_pat.search(query) if m is not None: num = query[:-len(m.group(1))] try: qd = now() - timedelta(int(num)) except: raise ParseException( _('Number conversion error: {0}').format(num)) field_count = 3 else: try: qd = parse_date(query, as_utc=False) except: raise ParseException( _('Date conversion error: {0}').format(query)) if '-' in query: field_count = query.count('-') + 1 else: field_count = query.count('/') + 1 for v, book_ids in field_iter(): if isinstance(v, (str, unicode)): v = parse_date(v) if v is not None and relop(dt_as_local(v), qd, field_count): matches |= book_ids return matches