예제 #1
0
    def _MatchTextField(self, field, match, document):
        """Check if a textual field matches a query tree node."""

        if match.getType() == QueryParser.FUZZY:
            return self._MatchTextField(field, match.getChild(0), document)

        if match.getType() == QueryParser.VALUE:
            if query_parser.IsPhrase(match):
                return self._MatchPhrase(field, match, document)

            if field.value().type() == document_pb.FieldValue.ATOM:
                return (field.value().string_value() ==
                        query_parser.GetQueryNodeText(match))

            query_tokens = self._parser.TokenizeText(
                query_parser.GetQueryNodeText(match))

            if not query_tokens:
                return True

            if len(query_tokens) > 1:

                def QueryNode(token):
                    return query_parser.CreateQueryNode(
                        token.chars, QueryParser.TEXT)

                return all(
                    self._MatchTextField(field, QueryNode(token), document)
                    for token in query_tokens)

            token_text = query_tokens[0].chars
            matching_docids = [
                post.doc_id for post in self._PostingsForFieldToken(
                    field.name(), token_text)
            ]
            return document.id() in matching_docids

        def ExtractGlobalEq(node):
            op = node.getType()
            if ((op == QueryParser.EQ or op == QueryParser.HAS)
                    and len(node.children) >= 2):
                if node.children[0].getType() == QueryParser.GLOBAL:
                    return node.children[1]
            return node

        if match.getType() == QueryParser.CONJUNCTION:
            return all(
                self._MatchTextField(field, ExtractGlobalEq(child), document)
                for child in match.children)

        if match.getType() == QueryParser.DISJUNCTION:
            return any(
                self._MatchTextField(field, ExtractGlobalEq(child), document)
                for child in match.children)

        if match.getType() == QueryParser.NEGATION:
            raise ExpressionTreeException('Unable to compare \"' +
                                          field.name() + '\" with negation')

        return False
    def _WalkQueryTree(self, query_node, doc, query_field=None, level=0):
        """Recursive match of doc from query tree at the given node."""
        query_type = query_node.getType()
        query_text = query_node.getText()

        self._Debug('_WalkQueryTree: query type: %r, field: %r, text: %r' %
                    (query_type, query_field, query_text),
                    level=level)

        if query_type is QueryParser.CONJUNCTION:
            for child in query_node.children:
                if not self._WalkQueryTree(self.ExtractGlobalEq(child),
                                           doc,
                                           query_field,
                                           level=level + 1):
                    return False
            return True

        elif query_type is QueryParser.DISJUNCTION:
            for child in query_node.children:
                if self._WalkQueryTree(self.ExtractGlobalEq(child),
                                       doc,
                                       query_field,
                                       level=level + 1):
                    return True

        if query_type is QueryParser.NEGATION:
            self._Debug(('No such field so no match: field: %r, children: %r' %
                         (query_type, query_node.children[0])), level)
            child = query_node.children[0]
            return not self._WalkQueryTree(
                self.ExtractGlobalEq(child), doc, query_field, level=level + 1)

        elif query_type is QueryParser.HAS:
            if query_node.children[0].getType() is not QueryParser.GLOBAL:
                query_field = query_node.children[0].getText()
                if query_field not in doc:
                    self._Debug(
                        ('No such field so no match: field: %r' % query_field),
                        level)
                    return False
            return self._WalkQueryTree(query_node.children[1],
                                       doc,
                                       query_field,
                                       level=level + 1)

        elif query_type is QueryParser.VALUE or query_type is QueryParser.TEXT:
            if query_parser.IsPhrase(query_node):
                query_text = query_parser.GetQueryNodeTextUnicode(query_node)
            if query_field is not None:
                return self._MatchField(doc,
                                        query_field,
                                        query_text,
                                        level=level)

            for field_name in doc:
                if self._MatchField(doc, field_name, query_text, level=level):
                    return True

        elif query_type in query_parser.COMPARISON_TYPES:
            query_field = query_node.children[0].getText()
            query_text = query_node.children[1].getText()
            if query_field is not None:
                if query_field not in doc:
                    self._Debug(
                        ('No such field so no match: field: %r' % query_field),
                        level)
                    return False
                return self._MatchField(doc,
                                        query_field,
                                        query_text,
                                        query_type,
                                        level=level)
            for field_name in doc:
                if self._MatchField(doc,
                                    field_name,
                                    query_text,
                                    query_type,
                                    level=level):
                    return True

        self._Debug(
            'Fallthrough at %s returning false, query_node.children: %s' %
            (query_text, [n.getText() for n in query_node.children]), level)
        return False