Exemple #1
0
    def get_keypair_matches(self, location, query, candidates):
        matches = set([])
        if query.find(':') >= 0:
            q = [q.strip() for q in query.split(':')]
            if len(q) != 2:
                raise ParseException(
                 _('Invalid query format for colon-separated search: {0}').format(query))
            (keyq, valq) = q
            keyq_mkind, keyq = self._matchkind(keyq)
            valq_mkind, valq = self._matchkind(valq)
        else:
            keyq = keyq_mkind = ''
            valq_mkind, valq = self._matchkind(query)

        loc = self.field_metadata[location]['rec_index']
        split_char = self.field_metadata[location]['is_multiple'].get(
                'cache_to_list', ',')
        for id_ in candidates:
            item = self._data[id_]
            if item is None:
                continue

            if item[loc] is None:
                if valq == 'false':
                    matches.add(id_)
                continue

            add_if_nothing_matches = valq == 'false'
            pairs = [p.strip() for p in item[loc].split(split_char)]
            for pair in pairs:
                parts = pair.split(':')
                if len(parts) != 2:
                    continue
                k = parts[:1]
                v = parts[1:]
                if keyq and not _match(keyq, k, keyq_mkind,
                                       use_primary_find_in_search=pref_use_primary_find_in_search):
                    continue
                if valq:
                    if valq == 'true':
                        if not v:
                            continue
                    elif valq == 'false':
                        if v:
                            add_if_nothing_matches = False
                            continue
                    elif not _match(valq, v, valq_mkind,
                                    use_primary_find_in_search=pref_use_primary_find_in_search):
                        continue
                matches.add(id_)

            if add_if_nothing_matches:
                matches.add(id_)
        return matches
Exemple #2
0
    def get_matches(self, location, query, candidates=None,
            allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set([])
        if candidates is None:
            candidates = self.universal_set()
        if len(candidates) == 0:
            return matches
        if location not in self.all_search_locations:
            return matches

        if len(location) > 2 and location.startswith('@') and \
                    location[1:] in self.db_prefs['grouped_search_terms']:
            location = location[1:]

        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
            original_location = location
            location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
            # grouped search terms
            if isinstance(location, list):
                if allow_recursion:
                    if query.lower() == 'false':
                        invert = True
                        query = 'true'
                    else:
                        invert = False
                    for loc in location:
                        c = candidates.copy()
                        m = self.get_matches(loc, query,
                                candidates=c, allow_recursion=False)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
                raise ParseException(_('Recursive query group detected: {0}').format(query))

            # apply the limit if appropriate
            if location == 'all' and prefs['limit_search_columns'] and \
                            prefs['limit_search_columns_to']:
                terms = set([])
                for l in prefs['limit_search_columns_to']:
                    l = icu_lower(l.strip())
                    if l and l != 'all' and l in self.all_search_locations:
                        terms.add(l)
                if terms:
                    c = candidates.copy()
                    for l in terms:
                        try:
                            m = self.get_matches(l, query,
                                candidates=c, allow_recursion=allow_recursion)
                            matches |= m
                            c -= m
                            if len(c) == 0:
                                break
                        except:
                            pass
                    return matches

            if location in self.field_metadata:
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm['datatype'] == 'datetime' or \
                        (fm['datatype'] == 'composite' and
                         fm['display'].get('composite_sort', '') == 'date'):
                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm['datatype'] in ('rating', 'int', 'float') or \
                        (fm['datatype'] == 'composite' and
                         fm['display'].get('composite_sort', '') == 'number'):
                    return self.get_numeric_matches(location, query.lower(), candidates)

                if fm['datatype'] == 'bool':
                    return self.get_bool_matches(location, query, candidates)

                # take care of the 'count' operator for is_multiples
                if fm['is_multiple'] and \
                        len(query) > 1 and query.startswith('#') and \
                        query[1:1] in '=<>!':
                    vf = lambda item, loc=fm['rec_index'], \
                                ms=fm['is_multiple']['cache_to_list']:\
                            len(item[loc].split(ms)) if item[loc] is not None else 0
                    return self.get_numeric_matches(location, query[1:],
                                                    candidates, val_func=vf)

                # special case: colon-separated fields such as identifiers. isbn
                # is a special case within the case
                if fm.get('is_csp', False):
                    if location == 'identifiers' and original_location == 'isbn':
                        return self.get_keypair_matches('identifiers',
                                                   '=isbn:'+query, candidates)
                    return self.get_keypair_matches(location, query, candidates)

            # check for user categories
            if len(location) >= 2 and location.startswith('@'):
                return self.get_user_category_matches(location[1:], query.lower(),
                                                      candidates)
            # everything else, or 'all' matches
            matchkind, query = self._matchkind(query)

            if not isinstance(query, unicode):
                query = query.decode('utf-8')

            db_col = {}
            exclude_fields = []  # fields to not check when matching against text.
            col_datatype = []
            is_multiple_cols = {}
            for x in range(len(self.FIELD_MAP)):
                col_datatype.append('')
            for x in self.field_metadata:
                if x.startswith('@'):
                    continue
                if len(self.field_metadata[x]['search_terms']):
                    db_col[x] = self.field_metadata[x]['rec_index']
                    if self.field_metadata[x]['datatype'] not in \
                            ['composite', 'text', 'comments', 'series', 'enumeration']:
                        exclude_fields.append(db_col[x])
                    col_datatype[db_col[x]] = self.field_metadata[x]['datatype']
                    is_multiple_cols[db_col[x]] = \
                        self.field_metadata[x]['is_multiple'].get('cache_to_list', None)

            try:
                rating_query = int(query) * 2
            except:
                rating_query = None

            location = [location] if location != 'all' else list(db_col.keys())
            for i, loc in enumerate(location):
                location[i] = db_col[loc]

            current_candidates = candidates.copy()
            for loc in location:  # location is now an array of field indices
                if loc == db_col['authors']:
                    # DB stores authors with commas changed to bars, so change query
                    if matchkind == REGEXP_MATCH:
                        q = query.replace(',', r'\|')
                    else:
                        q = query.replace(',', '|')
                elif loc == db_col['languages']:
                    q = canonicalize_lang(query)
                    if q is None:
                        lm = lang_map()
                        rm = {v.lower():k for k,v in lm.iteritems()}
                        q = rm.get(query, query)
                else:
                    q = query

                for id_ in current_candidates:
                    item = self._data[id_]
                    if item is None:
                        continue

                    if not item[loc]:
                        if q == 'false' and matchkind == CONTAINS_MATCH:
                            matches.add(item[0])
                        continue     # item is empty. No possible matches below
                    if q == 'false'and matchkind == CONTAINS_MATCH:
                        # Field has something in it, so a false query does not match
                        continue

                    if q == 'true' and matchkind == CONTAINS_MATCH:
                        if isinstance(item[loc], basestring):
                            if item[loc].strip() == '':
                                continue
                        matches.add(item[0])
                        continue

                    if col_datatype[loc] == 'rating':  # get here if 'all' query
                        if rating_query and rating_query == int(item[loc]):
                            matches.add(item[0])
                        continue

                    try:  # a conversion below might fail
                        # relationals are not supported in 'all' queries
                        if col_datatype[loc] == 'float':
                            if float(query) == item[loc]:
                                matches.add(item[0])
                            continue
                        if col_datatype[loc] == 'int':
                            if int(query) == item[loc]:
                                matches.add(item[0])
                            continue
                    except:
                        # A conversion threw an exception. Because of the type,
                        # no further match is possible
                        continue

                    if loc not in exclude_fields:  # time for text matching
                        if is_multiple_cols[loc] is not None:
                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
                        else:
                            vals = [item[loc]]  # make into list to make _match happy
                        if _match(q, vals, matchkind,
                                  use_primary_find_in_search=pref_use_primary_find_in_search):
                            matches.add(item[0])
                            continue
                current_candidates -= matches
        return matches
Exemple #3
0
    def get_matches(self, location, query):
        location = location.lower().strip()
        if location == 'formats':
            location = 'format'

        matchkind = CONTAINS_MATCH
        if len(query) > 1:
            if query.startswith('\\'):
                query = query[1:]
            elif query.startswith('='):
                matchkind = EQUALS_MATCH
                query = query[1:]
            elif query.startswith('~'):
                matchkind = REGEXP_MATCH
                query = query[1:]
        if matchkind != REGEXP_MATCH:  # leave case in regexps because it can be significant e.g. \S \W \D
            query = query.lower()

        if location not in self.USABLE_LOCATIONS:
            return set()
        matches = set()
        all_locs = set(self.USABLE_LOCATIONS) - {'all'}
        locations = all_locs if location == 'all' else [location]
        q = {
            'affiliate': lambda x: x.affiliate,
            'description': lambda x: x.description.lower(),
            'drm': lambda x: not x.drm_free_only,
            'enabled': lambda x: not is_disabled(x),
            'format': lambda x: ','.join(x.formats).lower(),
            'headquarters': lambda x: x.headquarters.lower(),
            'name': lambda x: x.name.lower(),
        }
        q['formats'] = q['format']
        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
                if query == 'true':
                    if locvalue in ('affiliate', 'drm', 'enabled'):
                        if accessor(sr) == True:  # noqa
                            matches.add(sr)
                    elif accessor(sr) is not None:
                        matches.add(sr)
                    continue
                if query == 'false':
                    if locvalue in ('affiliate', 'drm', 'enabled'):
                        if accessor(sr) == False:  # noqa
                            matches.add(sr)
                    elif accessor(sr) is None:
                        matches.add(sr)
                    continue
                # this is bool, so can't match below
                if locvalue in ('affiliate', 'drm', 'enabled'):
                    continue
                try:
                    # Can't separate authors because comma is used for name sep and author sep
                    # Exact match might not get what you want. For that reason, turn author
                    # exactmatch searches into contains searches.
                    if locvalue == 'name' and matchkind == EQUALS_MATCH:
                        m = CONTAINS_MATCH
                    else:
                        m = matchkind

                    if locvalue == 'format':
                        vals = accessor(sr).split(',')
                    else:
                        vals = [accessor(sr)]
                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError:  # Unicode errors
                    import traceback
                    traceback.print_exc()
        return matches
Exemple #4
0
    def get_matches(self, location, query):
        location = location.lower().strip()
        if location == 'authors':
            location = 'author'
        elif location == 'formats':
            location = 'format'

        matchkind = CONTAINS_MATCH
        if len(query) > 1:
            if query.startswith('\\'):
                query = query[1:]
            elif query.startswith('='):
                matchkind = EQUALS_MATCH
                query = query[1:]
            elif query.startswith('~'):
                matchkind = REGEXP_MATCH
                query = query[1:]
        if matchkind != REGEXP_MATCH:  # leave case in regexps because it can be significant e.g. \S \W \D
            query = query.lower()

        if location not in self.USABLE_LOCATIONS:
            return set([])
        matches = set([])
        all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
        locations = all_locs if location == 'all' else [location]
        q = {
            'author': lambda x: x.author.lower(),
            'format': attrgetter('formats'),
            'title': lambda x: x.title.lower(),
        }
        for x in ('author', 'format'):
            q[x + 's'] = q[x]
        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
                if query == 'true':
                    if accessor(sr) is not None:
                        matches.add(sr)
                    continue
                if query == 'false':
                    if accessor(sr) is None:
                        matches.add(sr)
                    continue
                try:
                    # Can't separate authors because comma is used for name sep and author sep
                    # Exact match might not get what you want. For that reason, turn author
                    # exactmatch searches into contains searches.
                    if locvalue == 'author' and matchkind == EQUALS_MATCH:
                        m = CONTAINS_MATCH
                    else:
                        m = matchkind

                    vals = [accessor(sr)]
                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError:  # Unicode errors
                    import traceback
                    traceback.print_exc()
        return matches
Exemple #5
0
    def get_matches(self, location, query):
        location = location.lower().strip()
        if location == 'formats':
            location = 'format'

        matchkind = CONTAINS_MATCH
        if len(query) > 1:
            if query.startswith('\\'):
                query = query[1:]
            elif query.startswith('='):
                matchkind = EQUALS_MATCH
                query = query[1:]
            elif query.startswith('~'):
                matchkind = REGEXP_MATCH
                query = query[1:]
        if matchkind != REGEXP_MATCH: ### leave case in regexps because it can be significant e.g. \S \W \D
            query = query.lower()

        if location not in self.USABLE_LOCATIONS:
            return set([])
        matches = set([])
        all_locs = set(self.USABLE_LOCATIONS) - set(['all'])
        locations = all_locs if location == 'all' else [location]
        q = {
             'affiliate': lambda x: x.affiliate,
             'description': lambda x: x.description.lower(),
             'drm': lambda x: not x.drm_free_only,
             'enabled': lambda x: not is_disabled(x),
             'format': lambda x: ','.join(x.formats).lower(),
             'headquarters': lambda x: x.headquarters.lower(),
             'name': lambda x : x.name.lower(),
        }
        q['formats'] = q['format']
        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
                if query == 'true':
                    if locvalue in ('affiliate', 'drm', 'enabled'):
                        if accessor(sr) == True:
                            matches.add(sr)
                    elif accessor(sr) is not None:
                        matches.add(sr)
                    continue
                if query == 'false':
                    if locvalue in ('affiliate', 'drm', 'enabled'):
                        if accessor(sr) == False:
                            matches.add(sr)
                    elif accessor(sr) is None:
                        matches.add(sr)
                    continue
                # this is bool, so can't match below
                if locvalue in ('affiliate', 'drm', 'enabled'):
                    continue
                try:
                    ### Can't separate authors because comma is used for name sep and author sep
                    ### Exact match might not get what you want. For that reason, turn author
                    ### exactmatch searches into contains searches.
                    if locvalue == 'name' and matchkind == EQUALS_MATCH:
                        m = CONTAINS_MATCH
                    else:
                        m = matchkind

                    if locvalue == 'format':
                        vals = accessor(sr).split(',')
                    else:
                        vals = [accessor(sr)]
                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError: # Unicode errors
                    import traceback
                    traceback.print_exc()
        return matches
Exemple #6
0
    def get_matches(self, location, query):
        location = location.lower().strip()
        if location == 'authors':
            location = 'author'
        elif location == 'formats':
            location = 'format'

        matchkind = CONTAINS_MATCH
        if len(query) > 1:
            if query.startswith('\\'):
                query = query[1:]
            elif query.startswith('='):
                matchkind = EQUALS_MATCH
                query = query[1:]
            elif query.startswith('~'):
                matchkind = REGEXP_MATCH
                query = query[1:]
        if matchkind != REGEXP_MATCH:  # leave case in regexps because it can be significant e.g. \S \W \D
            query = query.lower()

        if location not in self.USABLE_LOCATIONS:
            return set([])
        matches = set([])
        all_locs = set(self.USABLE_LOCATIONS) - {'all'}
        locations = all_locs if location == 'all' else [location]
        q = {
             'author': lambda x: x.author.lower(),
             'format': attrgetter('formats'),
             'title': lambda x: x.title.lower(),
        }
        for x in ('author', 'format'):
            q[x+'s'] = q[x]
        upf = prefs['use_primary_find_in_search']
        for sr in self.srs:
            for locvalue in locations:
                accessor = q[locvalue]
                if query == 'true':
                    if accessor(sr) is not None:
                        matches.add(sr)
                    continue
                if query == 'false':
                    if accessor(sr) is None:
                        matches.add(sr)
                    continue
                try:
                    # Can't separate authors because comma is used for name sep and author sep
                    # Exact match might not get what you want. For that reason, turn author
                    # exactmatch searches into contains searches.
                    if locvalue == 'author' and matchkind == EQUALS_MATCH:
                        m = CONTAINS_MATCH
                    else:
                        m = matchkind

                    vals = [accessor(sr)]
                    if _match(query, vals, m, use_primary_find_in_search=upf):
                        matches.add(sr)
                        break
                except ValueError:  # Unicode errors
                    import traceback
                    traceback.print_exc()
        return matches
Exemple #7
0
    def get_matches(self, location, query, candidates=None, allow_recursion=True):
        # If candidates is not None, it must not be modified. Changing its
        # value will break query optimization in the search parser
        matches = set([])
        if candidates is None:
            candidates = self.universal_set()
        if len(candidates) == 0:
            return matches
        if location not in self.all_search_locations:
            return matches

        if len(location) > 2 and location.startswith("@") and location[1:] in self.db_prefs["grouped_search_terms"]:
            location = location[1:]

        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
            original_location = location
            location = self.field_metadata.search_term_to_field_key(icu_lower(location.strip()))
            # grouped search terms
            if isinstance(location, list):
                if allow_recursion:
                    if query.lower() == "false":
                        invert = True
                        query = "true"
                    else:
                        invert = False
                    for loc in location:
                        c = candidates.copy()
                        m = self.get_matches(loc, query, candidates=c, allow_recursion=False)
                        matches |= m
                        c -= m
                        if len(c) == 0:
                            break
                    if invert:
                        matches = self.universal_set() - matches
                    return matches
                raise ParseException(_("Recursive query group detected: {0}").format(query))

            # apply the limit if appropriate
            if location == "all" and prefs["limit_search_columns"] and prefs["limit_search_columns_to"]:
                terms = set([])
                for l in prefs["limit_search_columns_to"]:
                    l = icu_lower(l.strip())
                    if l and l != "all" and l in self.all_search_locations:
                        terms.add(l)
                if terms:
                    c = candidates.copy()
                    for l in terms:
                        try:
                            m = self.get_matches(l, query, candidates=c, allow_recursion=allow_recursion)
                            matches |= m
                            c -= m
                            if len(c) == 0:
                                break
                        except:
                            pass
                    return matches

            if location in self.field_metadata:
                fm = self.field_metadata[location]
                # take care of dates special case
                if fm["datatype"] == "datetime" or (
                    fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "date"
                ):
                    return self.get_dates_matches(location, query.lower(), candidates)

                # take care of numbers special case
                if fm["datatype"] in ("rating", "int", "float") or (
                    fm["datatype"] == "composite" and fm["display"].get("composite_sort", "") == "number"
                ):
                    return self.get_numeric_matches(location, query.lower(), candidates)

                if fm["datatype"] == "bool":
                    return self.get_bool_matches(location, query, candidates)

                # take care of the 'count' operator for is_multiples
                if fm["is_multiple"] and len(query) > 1 and query.startswith("#") and query[1:1] in "=<>!":
                    vf = (
                        lambda item, loc=fm["rec_index"], ms=fm["is_multiple"]["cache_to_list"]: len(
                            item[loc].split(ms)
                        )
                        if item[loc] is not None
                        else 0
                    )
                    return self.get_numeric_matches(location, query[1:], candidates, val_func=vf)

                # special case: colon-separated fields such as identifiers. isbn
                # is a special case within the case
                if fm.get("is_csp", False):
                    if location == "identifiers" and original_location == "isbn":
                        return self.get_keypair_matches("identifiers", "=isbn:" + query, candidates)
                    return self.get_keypair_matches(location, query, candidates)

            # check for user categories
            if len(location) >= 2 and location.startswith("@"):
                return self.get_user_category_matches(location[1:], query.lower(), candidates)
            # everything else, or 'all' matches
            matchkind, query = self._matchkind(query)

            if not isinstance(query, unicode):
                query = query.decode("utf-8")

            db_col = {}
            exclude_fields = []  # fields to not check when matching against text.
            col_datatype = []
            is_multiple_cols = {}
            for x in range(len(self.FIELD_MAP)):
                col_datatype.append("")
            for x in self.field_metadata:
                if x.startswith("@"):
                    continue
                if len(self.field_metadata[x]["search_terms"]):
                    db_col[x] = self.field_metadata[x]["rec_index"]
                    if self.field_metadata[x]["datatype"] not in [
                        "composite",
                        "text",
                        "comments",
                        "series",
                        "enumeration",
                    ]:
                        exclude_fields.append(db_col[x])
                    col_datatype[db_col[x]] = self.field_metadata[x]["datatype"]
                    is_multiple_cols[db_col[x]] = self.field_metadata[x]["is_multiple"].get("cache_to_list", None)

            try:
                rating_query = int(query) * 2
            except:
                rating_query = None

            location = [location] if location != "all" else list(db_col.keys())
            for i, loc in enumerate(location):
                location[i] = db_col[loc]

            current_candidates = candidates.copy()
            for loc in location:  # location is now an array of field indices
                if loc == db_col["authors"]:
                    # DB stores authors with commas changed to bars, so change query
                    if matchkind == REGEXP_MATCH:
                        q = query.replace(",", r"\|")
                    else:
                        q = query.replace(",", "|")
                elif loc == db_col["languages"]:
                    q = canonicalize_lang(query)
                    if q is None:
                        lm = lang_map()
                        rm = {v.lower(): k for k, v in lm.iteritems()}
                        q = rm.get(query, query)
                else:
                    q = query

                for id_ in current_candidates:
                    item = self._data[id_]
                    if item is None:
                        continue

                    if not item[loc]:
                        if q == "false" and matchkind == CONTAINS_MATCH:
                            matches.add(item[0])
                        continue  # item is empty. No possible matches below
                    if q == "false" and matchkind == CONTAINS_MATCH:
                        # Field has something in it, so a false query does not match
                        continue

                    if q == "true" and matchkind == CONTAINS_MATCH:
                        if isinstance(item[loc], basestring):
                            if item[loc].strip() == "":
                                continue
                        matches.add(item[0])
                        continue

                    if col_datatype[loc] == "rating":  # get here if 'all' query
                        if rating_query and rating_query == int(item[loc]):
                            matches.add(item[0])
                        continue

                    try:  # a conversion below might fail
                        # relationals are not supported in 'all' queries
                        if col_datatype[loc] == "float":
                            if float(query) == item[loc]:
                                matches.add(item[0])
                            continue
                        if col_datatype[loc] == "int":
                            if int(query) == item[loc]:
                                matches.add(item[0])
                            continue
                    except:
                        # A conversion threw an exception. Because of the type,
                        # no further match is possible
                        continue

                    if loc not in exclude_fields:  # time for text matching
                        if is_multiple_cols[loc] is not None:
                            vals = [v.strip() for v in item[loc].split(is_multiple_cols[loc])]
                        else:
                            vals = [item[loc]]  # make into list to make _match happy
                        if _match(q, vals, matchkind, use_primary_find_in_search=pref_use_primary_find_in_search):
                            matches.add(item[0])
                            continue
                current_candidates -= matches
        return matches