Example #1
0
def get_lang_map():
    global _lang_map
    if _lang_map is None:
        _lang_map = lang_map().copy()
        for x in ('zxx', 'mis', 'mul'):
            _lang_map.pop(x, None)
    return _lang_map
Example #2
0
 def read_id_maps(self, db):
     ManyToManyTable.read_id_maps(self, db)
     lm = lang_map()
     self.lang_name_map = {
         x: lm.get(x, x)
         for x in self.id_map.itervalues()
     }
Example #3
0
def get_lang_map():
    global _lang_map
    if _lang_map is None:
        _lang_map = lang_map().copy()
        for x in ('zxx', 'mis', 'mul'):
            _lang_map.pop(x, None)
    return _lang_map
Example #4
0
    def __init__(self, parent=None, db=None):
        EditWithComplete.__init__(self, parent)

        self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
        self.setMinimumContentsLength(20)
        self._lang_map = lang_map()
        self.names_with_commas = [x for x in self._lang_map.itervalues() if ',' in x]
        self.comma_map = {k:k.replace(',', '|') for k in self.names_with_commas}
        self.comma_rmap = {v:k for k, v in self.comma_map.iteritems()}
        self._rmap = {lower(v):k for k,v in self._lang_map.iteritems()}
        self.init_langs(db)
Example #5
0
    def __init__(self, parent=None, db=None):
        EditWithComplete.__init__(self, parent)

        self.setSizeAdjustPolicy(self.AdjustToMinimumContentsLengthWithIcon)
        self.setMinimumContentsLength(20)
        self._lang_map = lang_map()
        self.names_with_commas = [
            x for x in self._lang_map.itervalues() if ',' in x
        ]
        self.comma_map = {
            k: k.replace(',', '|')
            for k in self.names_with_commas
        }
        self.comma_rmap = {v: k for k, v in self.comma_map.iteritems()}
        self._rmap = {lower(v): k for k, v in self._lang_map.iteritems()}
        self.init_langs(db)
Example #6
0
    def get_matches(self, location, query, candidates=None,
            allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set([])
        if candidates is None:
            candidates = self.universal_set()
        if len(candidates) == 0:
            return matches
        if location not in self.all_search_locations:
            return matches

        if len(location) > 2 and location.startswith('@') and \
                    location[1:] in self.db_prefs['grouped_search_terms']:
            location = location[1:]

        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
            original_location = location
            location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
            # grouped search terms
            if isinstance(location, list):
                if allow_recursion:
                    if query.lower() == 'false':
                        invert = True
                        query = 'true'
                    else:
                        invert = False
                    for loc in location:
                        c = candidates.copy()
                        m = self.get_matches(loc, query,
                                candidates=c, allow_recursion=False)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
                raise ParseException(_('Recursive query group detected: {0}').format(query))

            # apply the limit if appropriate
            if location == 'all' and prefs['limit_search_columns'] and \
                            prefs['limit_search_columns_to']:
                terms = set([])
                for l in prefs['limit_search_columns_to']:
                    l = icu_lower(l.strip())
                    if l and l != 'all' and l in self.all_search_locations:
                        terms.add(l)
                if terms:
                    c = candidates.copy()
                    for l in terms:
                        try:
                            m = self.get_matches(l, query,
                                candidates=c, allow_recursion=allow_recursion)
                            matches |= m
                            c -= m
                            if len(c) == 0:
                                break
                        except:
                            pass
                    return matches

            if location in self.field_metadata:
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm['datatype'] == 'datetime' or \
                        (fm['datatype'] == 'composite' and
                         fm['display'].get('composite_sort', '') == 'date'):
                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm['datatype'] in ('rating', 'int', 'float') or \
                        (fm['datatype'] == 'composite' and
                         fm['display'].get('composite_sort', '') == 'number'):
                    return self.get_numeric_matches(location, query.lower(), candidates)

                if fm['datatype'] == 'bool':
                    return self.get_bool_matches(location, query, candidates)

                # take care of the 'count' operator for is_multiples
                if fm['is_multiple'] and \
                        len(query) > 1 and query.startswith('#') and \
                        query[1:1] in '=<>!':
                    vf = lambda item, loc=fm['rec_index'], \
                                ms=fm['is_multiple']['cache_to_list']:\
                            len(item[loc].split(ms)) if item[loc] is not None else 0
                    return self.get_numeric_matches(location, query[1:],
                                                    candidates, val_func=vf)

                # special case: colon-separated fields such as identifiers. isbn
                # is a special case within the case
                if fm.get('is_csp', False):
                    if location == 'identifiers' and original_location == 'isbn':
                        return self.get_keypair_matches('identifiers',
                                                   '=isbn:'+query, candidates)
                    return self.get_keypair_matches(location, query, candidates)

            # check for user categories
            if len(location) >= 2 and location.startswith('@'):
                return self.get_user_category_matches(location[1:], query.lower(),
                                                      candidates)
            # everything else, or 'all' matches
            matchkind, query = self._matchkind(query)

            if not isinstance(query, unicode):
                query = query.decode('utf-8')

            db_col = {}
            exclude_fields = []  # fields to not check when matching against text.
            col_datatype = []
            is_multiple_cols = {}
            for x in range(len(self.FIELD_MAP)):
                col_datatype.append('')
            for x in self.field_metadata:
                if x.startswith('@'):
                    continue
                if len(self.field_metadata[x]['search_terms']):
                    db_col[x] = self.field_metadata[x]['rec_index']
                    if self.field_metadata[x]['datatype'] not in \
                            ['composite', 'text', 'comments', 'series', 'enumeration']:
                        exclude_fields.append(db_col[x])
                    col_datatype[db_col[x]] = self.field_metadata[x]['datatype']
                    is_multiple_cols[db_col[x]] = \
                        self.field_metadata[x]['is_multiple'].get('cache_to_list', None)

            try:
                rating_query = int(query) * 2
            except:
                rating_query = None

            location = [location] if location != 'all' else list(db_col.keys())
            for i, loc in enumerate(location):
                location[i] = db_col[loc]

            current_candidates = candidates.copy()
            for loc in location:  # location is now an array of field indices
                if loc == db_col['authors']:
                    # DB stores authors with commas changed to bars, so change query
                    if matchkind == REGEXP_MATCH:
                        q = query.replace(',', r'\|')
                    else:
                        q = query.replace(',', '|')
                elif loc == db_col['languages']:
                    q = canonicalize_lang(query)
                    if q is None:
                        lm = lang_map()
                        rm = {v.lower():k for k,v in lm.iteritems()}
                        q = rm.get(query, query)
                else:
                    q = query

                for id_ in current_candidates:
                    item = self._data[id_]
                    if item is None:
                        continue

                    if not item[loc]:
                        if q == 'false' and matchkind == CONTAINS_MATCH:
                            matches.add(item[0])
                        continue     # item is empty. No possible matches below
                    if q == 'false'and matchkind == CONTAINS_MATCH:
                        # Field has something in it, so a false query does not match
                        continue

                    if q == 'true' and matchkind == CONTAINS_MATCH:
                        if isinstance(item[loc], basestring):
                            if item[loc].strip() == '':
                                continue
                        matches.add(item[0])
                        continue

                    if col_datatype[loc] == 'rating':  # get here if 'all' query
                        if rating_query and rating_query == int(item[loc]):
                            matches.add(item[0])
                        continue

                    try:  # a conversion below might fail
                        # relationals are not supported in 'all' queries
                        if col_datatype[loc] == 'float':
                            if float(query) == item[loc]:
                                matches.add(item[0])
                            continue
                        if col_datatype[loc] == 'int':
                            if int(query) == item[loc]:
                                matches.add(item[0])
                            continue
                    except:
                        # A conversion threw an exception. Because of the type,
                        # no further match is possible
                        continue

                    if loc not in exclude_fields:  # time for text matching
                        if is_multiple_cols[loc] is not None:
                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
                        else:
                            vals = [item[loc]]  # make into list to make _match happy
                        if _match(q, vals, matchkind,
                                  use_primary_find_in_search=pref_use_primary_find_in_search):
                            matches.add(item[0])
                            continue
                current_candidates -= matches
        return matches
Example #7
0
    def get_matches(self, location, query, candidates=None,
                    allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set()

        if candidates is None:
            candidates = self.all_book_ids
        if not candidates or not query or not query.strip():
            return matches
        if location not in self.all_search_locations:
            return matches

        if (len(location) > 2 and location.startswith('@') and
                    location[1:] in self.grouped_search_terms):
            location = location[1:]

        # get metadata key associated with the search term. Eliminates
        # dealing with plurals and other aliases
        original_location = location
        location = self.field_metadata.search_term_to_field_key(
            icu_lower(location.strip()))
        # grouped search terms
        if isinstance(location, list):
            if allow_recursion:
                if query.lower() == 'false':
                    invert = True
                    query = 'true'
                else:
                    invert = False
                for loc in location:
                    c = candidates.copy()
                    m = self.get_matches(loc, query,
                            candidates=c, allow_recursion=False)
                    matches |= m
                    c -= m
                    if len(c) == 0:
                        break
                if invert:
                    matches = self.all_book_ids - matches
                return matches
            raise ParseException(
                       _('Recursive query group detected: {0}').format(query))

        # If the user has asked to restrict searching over all field, apply
        # that restriction
        if (location == 'all' and self.limit_search_columns and
            self.limit_search_columns_to):
            terms = set()
            for l in self.limit_search_columns_to:
                l = icu_lower(l.strip())
                if l and l != 'all' and l in self.all_search_locations:
                    terms.add(l)
            if terms:
                c = candidates.copy()
                for l in terms:
                    try:
                        m = self.get_matches(l, query,
                            candidates=c, allow_recursion=allow_recursion)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    except:
                        pass
                return matches

        upf = prefs['use_primary_find_in_search']

        if location in self.field_metadata:
            fm = self.field_metadata[location]
            dt = fm['datatype']

            # take care of dates special case
            if (dt == 'datetime' or (
                dt == 'composite' and
                fm['display'].get('composite_sort', '') == 'date')):
                if location == 'date':
                    location = 'timestamp'
                return self.date_search(
                    icu_lower(query), partial(self.field_iter, location, candidates))

            # take care of numbers special case
            if (dt in ('rating', 'int', 'float') or
                    (dt == 'composite' and
                     fm['display'].get('composite_sort', '') == 'number')):
                field = self.dbcache.fields[location]
                return self.num_search(
                    icu_lower(query), partial(self.field_iter, location, candidates),
                    location, dt, candidates, is_many=field.is_many)

            # take care of the 'count' operator for is_multiples
            if (fm['is_multiple'] and
                len(query) > 1 and query[0] == '#' and query[1] in '=<>!'):
                return self.num_search(icu_lower(query[1:]), partial(
                        self.dbcache.fields[location].iter_counts, candidates),
                    location, dt, candidates)

            # take care of boolean special case
            if dt == 'bool':
                return self.bool_search(icu_lower(query),
                                partial(self.field_iter, location, candidates),
                                self.dbcache._pref('bools_are_tristate'))

            # special case: colon-separated fields such as identifiers. isbn
            # is a special case within the case
            if fm.get('is_csp', False):
                field_iter = partial(self.field_iter, location, candidates)
                if location == 'identifiers' and original_location == 'isbn':
                    return self.keypair_search('=isbn:'+query, field_iter,
                                        candidates, upf)
                return self.keypair_search(query, field_iter, candidates, upf)

        # check for user categories
        if len(location) >= 2 and location.startswith('@'):
            return self.get_user_category_matches(location[1:], icu_lower(query), candidates)

        # Everything else (and 'all' matches)
        matchkind, query = _matchkind(query)
        all_locs = set()
        text_fields = set()
        field_metadata = {}

        for x, fm in self.field_metadata.iteritems():
            if x.startswith('@'):
                continue
            if fm['search_terms'] and x != 'series_sort':
                if x not in self.virtual_fields:
                    # We dont search virtual fields because if we do, search
                    # caching will not be used
                    all_locs.add(x)
                field_metadata[x] = fm
                if fm['datatype'] in {'composite', 'text', 'comments', 'series', 'enumeration'}:
                    text_fields.add(x)

        locations = all_locs if location == 'all' else {location}

        current_candidates = set(candidates)

        try:
            rating_query = int(float(query)) * 2
        except:
            rating_query = None

        try:
            int_query = int(float(query))
        except:
            int_query = None

        try:
            float_query = float(query)
        except:
            float_query = None

        for location in locations:
            current_candidates -= matches
            q = query
            if location == 'languages':
                q = canonicalize_lang(query)
                if q is None:
                    lm = lang_map()
                    rm = {v.lower():k for k,v in lm.iteritems()}
                    q = rm.get(query, query)

            if matchkind == CONTAINS_MATCH and q in {'true', 'false'}:
                found = set()
                for val, book_ids in self.field_iter(location, current_candidates):
                    if val and (not hasattr(val, 'strip') or val.strip()):
                        found |= book_ids
                matches |= (found if q == 'true' else (current_candidates-found))
                continue

            dt = field_metadata.get(location, {}).get('datatype', None)
            if dt == 'rating':
                if rating_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == rating_query:
                            matches |= book_ids
                continue

            if dt == 'float':
                if float_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == float_query:
                            matches |= book_ids
                continue

            if dt == 'int':
                if int_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == int_query:
                            matches |= book_ids
                continue

            if location in text_fields:
                for val, book_ids in self.field_iter(location, current_candidates):
                    if val is not None:
                        if isinstance(val, basestring):
                            val = (val,)
                        if _match(q, val, matchkind, use_primary_find_in_search=upf):
                            matches |= book_ids

        return matches
Example #8
0
 def current_val(self):
     ans = unicode_type(self.value_box.text()).strip()
     if self.current_col == 'languages':
         rmap = {lower(v):k for k, v in lang_map().iteritems()}
         ans = rmap.get(lower(ans), ans)
     return ans
Example #9
0
 def read_id_maps(self, db):
     ManyToManyTable.read_id_maps(self, db)
     lm = lang_map()
     self.lang_name_map = {x:lm.get(x, x) for x in self.id_map.itervalues()}
Example #10
0
 def current_val(self):
     ans = unicode(self.value_box.text()).strip()
     if self.current_col == 'languages':
         rmap = {lower(v):k for k, v in lang_map().iteritems()}
         ans = rmap.get(lower(ans), ans)
     return ans
Example #11
0
    def get_matches(self,
                    location,
                    query,
                    candidates=None,
                    allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set()

        if candidates is None:
            candidates = self.all_book_ids
        if not candidates or not query or not query.strip():
            return matches
        if location not in self.all_search_locations:
            return matches

        if (len(location) > 2 and location.startswith('@')
                and location[1:] in self.grouped_search_terms):
            location = location[1:]

        # get metadata key associated with the search term. Eliminates
        # dealing with plurals and other aliases
        original_location = location
        location = self.field_metadata.search_term_to_field_key(
            icu_lower(location.strip()))
        # grouped search terms
        if isinstance(location, list):
            if allow_recursion:
                if query.lower() == 'false':
                    invert = True
                    query = 'true'
                else:
                    invert = False
                for loc in location:
                    c = candidates.copy()
                    m = self.get_matches(loc,
                                         query,
                                         candidates=c,
                                         allow_recursion=False)
                    matches |= m
                    c -= m
                    if len(c) == 0:
                        break
                if invert:
                    matches = self.all_book_ids - matches
                return matches
            raise ParseException(
                _('Recursive query group detected: {0}').format(query))

        # If the user has asked to restrict searching over all field, apply
        # that restriction
        if (location == 'all' and self.limit_search_columns
                and self.limit_search_columns_to):
            terms = set()
            for l in self.limit_search_columns_to:
                l = icu_lower(l.strip())
                if l and l != 'all' and l in self.all_search_locations:
                    terms.add(l)
            if terms:
                c = candidates.copy()
                for l in terms:
                    try:
                        m = self.get_matches(l,
                                             query,
                                             candidates=c,
                                             allow_recursion=allow_recursion)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    except:
                        pass
                return matches

        upf = prefs['use_primary_find_in_search']

        if location in self.field_metadata:
            fm = self.field_metadata[location]
            dt = fm['datatype']

            # take care of dates special case
            if (dt == 'datetime' or
                (dt == 'composite'
                 and fm['display'].get('composite_sort', '') == 'date')):
                if location == 'date':
                    location = 'timestamp'
                return self.date_search(
                    icu_lower(query),
                    partial(self.field_iter, location, candidates))

            # take care of numbers special case
            if (dt in ('rating', 'int', 'float') or
                (dt == 'composite'
                 and fm['display'].get('composite_sort', '') == 'number')):
                if location == 'id':
                    is_many = False

                    def fi(default_value=None):
                        for qid in candidates:
                            yield qid, {qid}
                else:
                    field = self.dbcache.fields[location]
                    fi, is_many = partial(self.field_iter, location,
                                          candidates), field.is_many
                return self.num_search(icu_lower(query),
                                       fi,
                                       location,
                                       dt,
                                       candidates,
                                       is_many=is_many)

            # take care of the 'count' operator for is_multiples
            if (fm['is_multiple'] and len(query) > 1 and query[0] == '#'
                    and query[1] in '=<>!'):
                return self.num_search(
                    icu_lower(query[1:]),
                    partial(self.dbcache.fields[location].iter_counts,
                            candidates), location, dt, candidates)

            # take care of boolean special case
            if dt == 'bool':
                return self.bool_search(
                    icu_lower(query),
                    partial(self.field_iter, location, candidates),
                    self.dbcache._pref('bools_are_tristate'))

            # special case: colon-separated fields such as identifiers. isbn
            # is a special case within the case
            if fm.get('is_csp', False):
                field_iter = partial(self.field_iter, location, candidates)
                if location == 'identifiers' and original_location == 'isbn':
                    return self.keypair_search('=isbn:' + query, field_iter,
                                               candidates, upf)
                return self.keypair_search(query, field_iter, candidates, upf)

        # check for user categories
        if len(location) >= 2 and location.startswith('@'):
            return self.get_user_category_matches(location[1:],
                                                  icu_lower(query), candidates)

        # Everything else (and 'all' matches)
        case_sensitive = prefs['case_sensitive']
        matchkind, query = _matchkind(query, case_sensitive=case_sensitive)
        all_locs = set()
        text_fields = set()
        field_metadata = {}

        for x, fm in self.field_metadata.iteritems():
            if x.startswith('@'):
                continue
            if fm['search_terms'] and x not in {'series_sort', 'id'}:
                if x not in self.virtual_fields and x != 'uuid':
                    # We dont search virtual fields because if we do, search
                    # caching will not be used
                    all_locs.add(x)
                field_metadata[x] = fm
                if fm['datatype'] in {
                        'composite', 'text', 'comments', 'series',
                        'enumeration'
                }:
                    text_fields.add(x)

        locations = all_locs if location == 'all' else {location}

        current_candidates = set(candidates)

        try:
            rating_query = int(float(query)) * 2
        except:
            rating_query = None

        try:
            int_query = int(float(query))
        except:
            int_query = None

        try:
            float_query = float(query)
        except:
            float_query = None

        for location in locations:
            current_candidates -= matches
            q = query
            if location == 'languages':
                q = canonicalize_lang(query)
                if q is None:
                    lm = lang_map()
                    rm = {v.lower(): k for k, v in lm.iteritems()}
                    q = rm.get(query, query)

            if matchkind == CONTAINS_MATCH and q.lower() in {'true', 'false'}:
                found = set()
                for val, book_ids in self.field_iter(location,
                                                     current_candidates):
                    if val and (not hasattr(val, 'strip') or val.strip()):
                        found |= book_ids
                matches |= (found if q.lower() == 'true' else
                            (current_candidates - found))
                continue

            dt = field_metadata.get(location, {}).get('datatype', None)
            if dt == 'rating':
                if rating_query is not None:
                    for val, book_ids in self.field_iter(
                            location, current_candidates):
                        if val == rating_query:
                            matches |= book_ids
                continue

            if dt == 'float':
                if float_query is not None:
                    for val, book_ids in self.field_iter(
                            location, current_candidates):
                        if val == float_query:
                            matches |= book_ids
                continue

            if dt == 'int':
                if int_query is not None:
                    for val, book_ids in self.field_iter(
                            location, current_candidates):
                        if val == int_query:
                            matches |= book_ids
                continue

            if location in text_fields:
                for val, book_ids in self.field_iter(location,
                                                     current_candidates):
                    if val is not None:
                        if isinstance(val, basestring):
                            val = (val, )
                        if _match(q,
                                  val,
                                  matchkind,
                                  use_primary_find_in_search=upf,
                                  case_sensitive=case_sensitive):
                            matches |= book_ids

            if location == 'series_sort':
                book_lang_map = self.dbcache.fields['languages'].book_value_map
                for val, book_ids in self.dbcache.fields[
                        'series'].iter_searchable_values_for_sort(
                            current_candidates, book_lang_map):
                    if val is not None:
                        if _match(q, (val, ),
                                  matchkind,
                                  use_primary_find_in_search=upf,
                                  case_sensitive=case_sensitive):
                            matches |= book_ids

        return matches
Example #12
0
    def get_matches(self, location, query, candidates=None, allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set()

        if candidates is None:
            candidates = self.all_book_ids
        if not candidates or not query or not query.strip():
            return matches
        if location not in self.all_search_locations:
            return matches

        if len(location) > 2 and location.startswith("@") and location[1:] in self.grouped_search_terms:
            location = location[1:]

        # get metadata key associated with the search term. Eliminates
        # dealing with plurals and other aliases
        original_location = location
        location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
        # grouped search terms
        if isinstance(location, list):
            if allow_recursion:
                if query.lower() == "false":
                    invert = True
                    query = "true"
                else:
                    invert = False
                for loc in location:
                    c = candidates.copy()
                    m = self.get_matches(loc, query, candidates=c, allow_recursion=False)
                    matches |= m
                    c -= m
                    if len(c) == 0:
                        break
                if invert:
                    matches = self.all_book_ids - matches
                return matches
            raise ParseException(_("Recursive query group detected: {0}").format(query))

        # If the user has asked to restrict searching over all field, apply
        # that restriction
        if location == "all" and self.limit_search_columns and self.limit_search_columns_to:
            terms = set()
            for l in self.limit_search_columns_to:
                l = icu_lower(l.strip())
                if l and l != "all" and l in self.all_search_locations:
                    terms.add(l)
            if terms:
                c = candidates.copy()
                for l in terms:
                    try:
                        m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    except:
                        pass
                return matches

        upf = prefs["use_primary_find_in_search"]

        if location in self.field_metadata:
            fm = self.field_metadata[location]
            dt = fm["datatype"]

            # take care of dates special case
            if dt == "datetime" or (dt == "composite" and fm["display"].get("composite_sort", "") == "date"):
                if location == "date":
                    location = "timestamp"
                return self.date_search(icu_lower(query), partial(self.field_iter, location, candidates))

            # take care of numbers special case
            if dt in ("rating", "int", "float") or (
                dt == "composite" and fm["display"].get("composite_sort", "") == "number"
            ):
                if location == "id":
                    is_many = False

                    def fi(default_value=None):
                        for qid in candidates:
                            yield qid, {qid}

                else:
                    field = self.dbcache.fields[location]
                    fi, is_many = partial(self.field_iter, location, candidates), field.is_many
                return self.num_search(icu_lower(query), fi, location, dt, candidates, is_many=is_many)

            # take care of the 'count' operator for is_multiples
            if fm["is_multiple"] and len(query) > 1 and query[0] == "#" and query[1] in "=<>!":
                return self.num_search(
                    icu_lower(query[1:]),
                    partial(self.dbcache.fields[location].iter_counts, candidates),
                    location,
                    dt,
                    candidates,
                )

            # take care of boolean special case
            if dt == "bool":
                return self.bool_search(
                    icu_lower(query),
                    partial(self.field_iter, location, candidates),
                    self.dbcache._pref("bools_are_tristate"),
                )

            # special case: colon-separated fields such as identifiers. isbn
            # is a special case within the case
            if fm.get("is_csp", False):
                field_iter = partial(self.field_iter, location, candidates)
                if location == "identifiers" and original_location == "isbn":
                    return self.keypair_search("=isbn:" + query, field_iter, candidates, upf)
                return self.keypair_search(query, field_iter, candidates, upf)

        # check for user categories
        if len(location) >= 2 and location.startswith("@"):
            return self.get_user_category_matches(location[1:], icu_lower(query), candidates)

        # Everything else (and 'all' matches)
        matchkind, query = _matchkind(query)
        all_locs = set()
        text_fields = set()
        field_metadata = {}

        for x, fm in self.field_metadata.iteritems():
            if x.startswith("@"):
                continue
            if fm["search_terms"] and x not in {"series_sort", "id"}:
                if x not in self.virtual_fields and x != "uuid":
                    # We dont search virtual fields because if we do, search
                    # caching will not be used
                    all_locs.add(x)
                field_metadata[x] = fm
                if fm["datatype"] in {"composite", "text", "comments", "series", "enumeration"}:
                    text_fields.add(x)

        locations = all_locs if location == "all" else {location}

        current_candidates = set(candidates)

        try:
            rating_query = int(float(query)) * 2
        except:
            rating_query = None

        try:
            int_query = int(float(query))
        except:
            int_query = None

        try:
            float_query = float(query)
        except:
            float_query = None

        for location in locations:
            current_candidates -= matches
            q = query
            if location == "languages":
                q = canonicalize_lang(query)
                if q is None:
                    lm = lang_map()
                    rm = {v.lower(): k for k, v in lm.iteritems()}
                    q = rm.get(query, query)

            if matchkind == CONTAINS_MATCH and q in {"true", "false"}:
                found = set()
                for val, book_ids in self.field_iter(location, current_candidates):
                    if val and (not hasattr(val, "strip") or val.strip()):
                        found |= book_ids
                matches |= found if q == "true" else (current_candidates - found)
                continue

            dt = field_metadata.get(location, {}).get("datatype", None)
            if dt == "rating":
                if rating_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == rating_query:
                            matches |= book_ids
                continue

            if dt == "float":
                if float_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == float_query:
                            matches |= book_ids
                continue

            if dt == "int":
                if int_query is not None:
                    for val, book_ids in self.field_iter(location, current_candidates):
                        if val == int_query:
                            matches |= book_ids
                continue

            if location in text_fields:
                for val, book_ids in self.field_iter(location, current_candidates):
                    if val is not None:
                        if isinstance(val, basestring):
                            val = (val,)
                        if _match(q, val, matchkind, use_primary_find_in_search=upf):
                            matches |= book_ids

            if location == "series_sort":
                book_lang_map = self.dbcache.fields["languages"].book_value_map
                for val, book_ids in self.dbcache.fields["series"].iter_searchable_values_for_sort(
                    current_candidates, book_lang_map
                ):
                    if val is not None:
                        if _match(q, (val,), matchkind, use_primary_find_in_search=upf):
                            matches |= book_ids

        return matches
Example #13
0
    def get_matches(self, location, query, candidates=None, allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set([])
        if candidates is None:
            candidates = self.universal_set()
        if len(candidates) == 0:
            return matches
        if location not in self.all_search_locations:
            return matches

        if len(location) > 2 and location.startswith("@") and location[1:] in self.db_prefs["grouped_search_terms"]:
            location = location[1:]

        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
            original_location = location
            location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
            # grouped search terms
            if isinstance(location, list):
                if allow_recursion:
                    if query.lower() == "false":
                        invert = True
                        query = "true"
                    else:
                        invert = False
                    for loc in location:
                        c = candidates.copy()
                        m = self.get_matches(loc, query, candidates=c, allow_recursion=False)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
                raise ParseException(_("Recursive query group detected: {0}").format(query))

            # apply the limit if appropriate
            if location == "all" and prefs["limit_search_columns"] and prefs["limit_search_columns_to"]:
                terms = set([])
                for l in prefs["limit_search_columns_to"]:
                    l = icu_lower(l.strip())
                    if l and l != "all" and l in self.all_search_locations:
                        terms.add(l)
                if terms:
                    c = candidates.copy()
                    for l in terms:
                        try:
                            m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion)
                            matches |= m
                            c -= m
                            if len(c) == 0:
                                break
                        except:
                            pass
                    return matches

            if location in self.field_metadata:
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm["datatype"] == "datetime" or (
                    fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "date"
                ):
                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm["datatype"] in ("rating", "int", "float") or (
                    fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "number"
                ):
                    return self.get_numeric_matches(location, query.lower(), candidates)

                if fm["datatype"] == "bool":
                    return self.get_bool_matches(location, query, candidates)

                # take care of the 'count' operator for is_multiples
                if fm["is_multiple"] and len(query) > 1 and query.startswith("#") and query[1:1] in "=<>!":
                    vf = (
                        lambda item, loc=fm["rec_index"], ms=fm["is_multiple"]["cache_to_list"]: len(
                            item[loc].split(ms)
                        )
                        if item[loc] is not None
                        else 0
                    )
                    return self.get_numeric_matches(location, query[1:], candidates, val_func=vf)

                # special case: colon-separated fields such as identifiers. isbn
                # is a special case within the case
                if fm.get("is_csp", False):
                    if location == "identifiers" and original_location == "isbn":
                        return self.get_keypair_matches("identifiers", "=isbn:" + query, candidates)
                    return self.get_keypair_matches(location, query, candidates)

            # check for user categories
            if len(location) >= 2 and location.startswith("@"):
                return self.get_user_category_matches(location[1:], query.lower(), candidates)
            # everything else, or 'all' matches
            matchkind, query = self._matchkind(query)

            if not isinstance(query, unicode):
                query = query.decode("utf-8")

            db_col = {}
            exclude_fields = []  # fields to not check when matching against text.
            col_datatype = []
            is_multiple_cols = {}
            for x in range(len(self.FIELD_MAP)):
                col_datatype.append("")
            for x in self.field_metadata:
                if x.startswith("@"):
                    continue
                if len(self.field_metadata[x]["search_terms"]):
                    db_col[x] = self.field_metadata[x]["rec_index"]
                    if self.field_metadata[x]["datatype"] not in [
                        "composite",
                        "text",
                        "comments",
                        "series",
                        "enumeration",
                    ]:
                        exclude_fields.append(db_col[x])
                    col_datatype[db_col[x]] = self.field_metadata[x]["datatype"]
                    is_multiple_cols[db_col[x]] = self.field_metadata[x]["is_multiple"].get("cache_to_list", None)

            try:
                rating_query = int(query) * 2
            except:
                rating_query = None

            location = [location] if location != "all" else list(db_col.keys())
            for i, loc in enumerate(location):
                location[i] = db_col[loc]

            current_candidates = candidates.copy()
            for loc in location:  # location is now an array of field indices
                if loc == db_col["authors"]:
                    # DB stores authors with commas changed to bars, so change query
                    if matchkind == REGEXP_MATCH:
                        q = query.replace(",", r"\|")
                    else:
                        q = query.replace(",", "|")
                elif loc == db_col["languages"]:
                    q = canonicalize_lang(query)
                    if q is None:
                        lm = lang_map()
                        rm = {v.lower(): k for k, v in lm.iteritems()}
                        q = rm.get(query, query)
                else:
                    q = query

                for id_ in current_candidates:
                    item = self._data[id_]
                    if item is None:
                        continue

                    if not item[loc]:
                        if q == "false" and matchkind == CONTAINS_MATCH:
                            matches.add(item[0])
                        continue  # item is empty. No possible matches below
                    if q == "false" and matchkind == CONTAINS_MATCH:
                        # Field has something in it, so a false query does not match
                        continue

                    if q == "true" and matchkind == CONTAINS_MATCH:
                        if isinstance(item[loc], basestring):
                            if item[loc].strip() == "":
                                continue
                        matches.add(item[0])
                        continue

                    if col_datatype[loc] == "rating":  # get here if 'all' query
                        if rating_query and rating_query == int(item[loc]):
                            matches.add(item[0])
                        continue

                    try:  # a conversion below might fail
                        # relationals are not supported in 'all' queries
                        if col_datatype[loc] == "float":
                            if float(query) == item[loc]:
                                matches.add(item[0])
                            continue
                        if col_datatype[loc] == "int":
                            if int(query) == item[loc]:
                                matches.add(item[0])
                            continue
                    except:
                        # A conversion threw an exception. Because of the type,
                        # no further match is possible
                        continue

                    if loc not in exclude_fields:  # time for text matching
                        if is_multiple_cols[loc] is not None:
                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
                        else:
                            vals = [item[loc]]  # make into list to make _match happy
                        if _match(q, vals, matchkind, use_primary_find_in_search=pref_use_primary_find_in_search):
                            matches.add(item[0])
                            continue
                current_candidates -= matches
        return matches