Exemple #1
0
def _get_xquery(catalog, query=None, **kw):
    # Case 1: a query is given
    if query is not None:
        return catalog._query2xquery(query)

    # Case 2: nothing has been specified, return everything
    if not kw:
        return Query('')

    # Case 3: build the query from the keyword parameters
    metadata = catalog._metadata
    fields = catalog._fields
    xqueries = []
    for name, value in kw.iteritems():
        # If name is a field not yet indexed, return nothing
        if name not in metadata:
            warn_not_indexed(name)
            return Query()

        # Ok
        info = metadata[name]
        prefix = info['prefix']
        field_cls = _get_field_cls(name, fields, info)
        query = _make_PhraseQuery(field_cls, value, prefix)
        xqueries.append(query)

    return Query(OP_AND, xqueries)
Exemple #2
0
 def to_query(self, query, default_op=Query.OP_AND):
     if isinstance(query, list):
         return Query(default_op, [self.to_query(q) for q in query])
     elif isinstance(query, basestring):
         return Query(query)
     elif isinstance(query, Query):
         return query
    def _parse_query_term(self, name, prefix, value):
        if isinstance(value, list):
            subqueries = [self._parse_query_term(name, prefix, word)
                for word in value]
            return Query(Query.OP_OR, subqueries)

        elif prefix:
            return Query(_PREFIX_FULL_VALUE + prefix + str(value))
        else:
            return Query(_PREFIX_NONE + str(value))
Exemple #4
0
    def __init__(self, *args, **kwargs):
        """
        @keyword encoding: specifiy the encoding manually (default: value of config.charset)
        """
        self.encoding = kwargs.get('encoding', config.charset)

        nargs = []
        for term in args:
            if isinstance(term, unicode):
                term = term.encode(self.encoding)
            elif isinstance(term, list) or isinstance(term, tuple):
                term = [t.encode(self.encoding) for t in term]
            nargs.append(term)

        Query.__init__(self, *nargs, **kwargs)
Exemple #5
0
    def _parse_query_value_range(self, name, info, value):
        if len(value) != 2:
            raise TypeError('Only tuples of size 2 have a defined meaning. '
                            'Did you mean to pass a list instead?')

        start, end = value
        return Query(Query.OP_VALUE_RANGE, info['number'],
                     self._convert_value(info, start),
                     self._convert_value(info, end))
Exemple #6
0
    def _parse_query_xapian(self, query_str):
        try:
            return xapian.QueryParser.parse_query(
                self, query_str,
                QueryParser.FLAG_PHRASE | QueryParser.FLAG_BOOLEAN
                | QueryParser.FLAG_LOVEHATE | QueryParser.FLAG_WILDCARD, '')

        except xapian.QueryParserError, exception:
            logging.warning('Invalid query string: ' + exception.get_msg())
            return Query()
Exemple #7
0
def phrase(terms, prefix=None, language=None, window=3):
    if isinstance(terms, basestring):
        terms = terms.split()
    if prefix:
        terms = ['%s:%s' % (prefix, t) for t in terms]
    if language:
        stem = xapian.Stem(language)
        terms = ['Z%s' % stem(t) for t in terms]
    print terms
    return Query(Query.OP_PHRASE, terms, window)
    def parse_query(self, query_dict, query_string):
        logging.debug('parse_query %r %r', query_dict, query_string)
        queries = []
        query_dict = dict(query_dict)

        if query_string is not None:
            queries.append(self._parse_query_xapian(str(query_string)))

        for name, value in query_dict.items():
            if name in _QUERY_TERM_MAP:
                queries.append(self._parse_query_term(name,
                    _QUERY_TERM_MAP[name], value))
            elif name in _QUERY_VALUE_MAP:
                queries.append(self._parse_query_value(name,
                    _QUERY_VALUE_MAP[name], value))
            else:
                logging.warning('Unknown term: %r=%r', name, value)

        if not queries:
            queries.append(Query(''))

        logging.debug('queries: %r', [str(q) for q in queries])
        return Query(Query.OP_AND, queries)
Exemple #9
0
def _make_PhraseQuery(field_cls, value, prefix):
    # Get the words
    # XXX It's too complex (slow), we must use xapian
    #     Problem => _index_cjk
    xdoc = Document()
    # XXX Language = 'en' by default
    _index(xdoc, field_cls, value, prefix, 'en')
    words = []
    for term_list_item in xdoc:
        term = term_list_item.term
        for termpos in term_list_item.positer:
            words.append((termpos, term))
    words.sort()
    words = [ word[1] for word in words ]

    # Make the query
    return Query(OP_PHRASE, words)
    def _parse_query_value(self, name, info, value):
        if isinstance(value, list):
            subqueries = [self._parse_query_value(name, info, word)
                for word in value]
            return Query(Query.OP_OR, subqueries)

        elif isinstance(value, tuple):
            return self._parse_query_value_range(name, info, value)

        elif isinstance(value, dict):
            # compatibility option for timestamp: {'start': 0, 'end': 1}
            start = value.get('start', 0)
            end = value.get('end', sys.maxint)
            return self._parse_query_value_range(name, info, (start, end))

        else:
            return self._parse_query_value_range(name, info, (value, value))
Exemple #11
0
def elite(terms, prefix=None, window=10):
    if isinstance(terms, basestring):
        terms = terms.split()
    if prefix:
        terms = ['%s:%s' % (prefix, t) for t in terms]
    return Query(Query.OP_ELITE_SET, terms, window)
Exemple #12
0
    def search(self, query=None, **kw):
        catalog = self._catalog

        xquery = _get_xquery(catalog, query, **kw)
        query = Query(Query.OP_AND, [self._xquery, xquery])
        return SearchResults(catalog, query)
Exemple #13
0
def get_docs(db):
    enquire = Enquire(db)
    enquire.set_query(Query(''))
    docs_max = enquire.get_mset(0, 0).get_matches_upper_bound()
    return [doc.get_document() for doc in enquire.get_mset(0, docs_max)]
Exemple #14
0
 def operator(self, query, op):
     """Wrap self with an operator and another query.
     """
     query = querify(query)
     return self.copy(query=Query(op, self.query, query))
Exemple #15
0
 def scale(self, factor):
     return self.copy(query=Query(Query.OP_SCALE_WEIGHT, self.query, factor))
Exemple #16
0
def elite(db, terms, window=10, **kwargs):
    return Search(db, Query(Query.OP_ELITE_SET, terms, window), **kwargs)
Exemple #17
0
def phrase(db, terms, window=10, **kwargs):
    return Search(db, Query(Query.OP_PHRASE, terms, window), **kwargs)
Exemple #18
0
 def search(self, query=None, **kw):
     xquery = _get_xquery(self._catalog, query, **kw)
     query = Query(Query.OP_AND, [self._xquery, xquery])
     return self.__class__(self._catalog, query)
Exemple #19
0
    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = type(query)
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value = info.get('value')
            if value is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)
            if field_cls.multiple:
                error = 'range-query not supported on multiple fields'
                raise ValueError, error

            left = query.left
            if left is not None:
                left = _encode_simple_value(field_cls, left)

            right = query.right
            if right is not None:
                right = _encode_simple_value(field_cls, right)

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, left)

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, right)

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, left, right)

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value_nb = info.get('value')
            if value_nb is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # TextQuery, the field must be indexed
        if query_class is TextQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected %s for 'name'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            field_cls = _get_field_cls(name, fields, info)
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name

            # Remove accents from the value
            value = query.value
            if type(value) is not unicode:
                raise TypeError, "unexpected %s for 'value'" % type(value)
            value = value.translate(TRANSLATE_MAP)

            qp = QueryParser()
            qp.set_database(self._db)
            return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix)

        i2x = self._query2xquery
        # Multiple query with single atom
        if isinstance(query, _MultipleQuery) and len(query.atoms) == 1:
            return i2x(query.atoms[0])

        # And
        if query_class is _AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is _OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))
Exemple #20
0
 def operator(self, query, op):
     """Wrap self with an operator and another query.
     """
     query = self._db.querify(query, language=self._language)
     return self.copy(query=Query(op, self.query, query))
Exemple #21
0
 def and_elite(self, queries):
     return self.and_(Query(Query.OP_ELITE_SET, queries))
Exemple #22
0
    def search(self, query=None, **kw):
        database = self._database

        xquery = _get_xquery(database.catalog, query, **kw)
        query = Query(Query.OP_AND, [self._xquery, xquery])
        return self.__class__(database, query)
Exemple #23
0
def near(db, terms, window=10, **kwargs):
    return Search(db, Query(Query.OP_NEAR, terms, window), **kwargs)
Exemple #24
0
 def or_elite(self, *queries):
     return self.or_(Query(Query.OP_ELITE_SET, queries))