Ejemplo n.º 1
0
    def _Snippet(self, query, field, *args):
        """Create a snippet given a query and the field to query on.

    Args:
      query: A query string containing only a bare term (no operators).
      field: The field name to query on.
      *args: Unused optional arguments. These are not used on dev_appserver.

    Returns:
      A snippet for the field with the query term bolded.

    Raises:
      ExpressionEvaluationError: if this is a sort expression.
    """
        field = query_parser.GetQueryNodeText(field)

        if self._is_sort_expression:
            raise ExpressionEvaluationError(
                'Failed to parse sort expression \'snippet(' +
                query_parser.GetQueryNodeText(query) + ', ' + field +
                ')\': snippet() is not supported in sort expressions')

        schema = self._inverted_index.GetSchema()
        if schema.IsType(field, document_pb.FieldValue.NUMBER):
            raise ExpressionEvaluationError(
                'Failed to parse field expression \'snippet(' +
                query_parser.GetQueryNodeText(query) + ', ' + field +
                ')\': snippet() argument 2 must be text')

        terms = self._tokenizer.TokenizeText(
            query_parser.GetQueryNodeText(query).strip('"'))
        for term in terms:
            search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars))
            postings = self._inverted_index.GetPostingsForToken(search_token)
            for posting in postings:
                if posting.doc_id != self._doc_pb.id(
                ) or not posting.positions:
                    continue

                field_val = self._GetFieldValue(
                    search_util.GetFieldInDocument(self._doc_pb, field))
                if not field_val:
                    continue
                doc_words = [
                    token.chars for token in
                    self._case_preserving_tokenizer.TokenizeText(field_val)
                ]

                position = posting.positions[0]
                return self._GenerateSnippet(
                    doc_words, position,
                    search_util.DEFAULT_MAX_SNIPPET_LENGTH)
            else:
                field_val = self._GetFieldValue(
                    search_util.GetFieldInDocument(self._doc_pb, field))
                if not field_val:
                    return ''
                return '%s...' % field_val[:search_util.
                                           DEFAULT_MAX_SNIPPET_LENGTH]
    def _Eval(self, node):
        if node.getType() is ExpressionParser.FN:
            func = self._function_table[query_parser.GetQueryNodeText(node)]

            return func(*node.children)

        if node.getType() is ExpressionParser.PLUS:
            return self._EvalBinaryOp(lambda a, b: a + b, 'addition', node)
        if node.getType() is ExpressionParser.MINUS:
            return self._EvalBinaryOp(lambda a, b: a - b, 'subtraction', node)
        if node.getType() is ExpressionParser.DIV:
            return self._EvalBinaryOp(lambda a, b: a / b, 'division', node)
        if node.getType() is ExpressionParser.TIMES:
            return self._EvalBinaryOp(lambda a, b: a * b, 'multiplication',
                                      node)
        if node.getType() is ExpressionParser.NEG:
            return self._EvalUnaryOp(lambda a: -a, 'negation', node)

        if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT):
            return float(query_parser.GetQueryNodeText(node))
        if node.getType() is ExpressionParser.PHRASE:
            return query_parser.GetQueryNodeText(node).strip('"')

        if node.getType() is ExpressionParser.NAME:
            name = query_parser.GetQueryNodeText(node)
            if name == '_score':
                return self._doc.score
            field = search_util.GetFieldInDocument(self._doc_pb, name)
            if field:
                return search_util.GetFieldValue(field)
            raise _ExpressionError('No field %s in document' % name)

        raise _ExpressionError('Unable to handle node %s' % node)
Ejemplo n.º 3
0
    def _Snippet(self, query, field, *args):
        """Create a snippet given a query and the field to query on.

    Args:
      query: A query string containing only a bare term (no operators).
      field: The field name to query on.
      *args: Unused optional arguments. These are not used on dev_appserver.

    Returns:
      A snippet for the field with the query term bolded.
    """
        field = query_parser.GetQueryNodeText(field)
        terms = self._tokenizer.TokenizeText(
            query_parser.GetQueryNodeText(query).strip('"'))
        for term in terms:
            search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars))
            postings = self._inverted_index.GetPostingsForToken(search_token)
            for posting in postings:
                if posting.doc_id != self._doc_pb.id(
                ) or not posting.positions:
                    continue

                field_val = search_util.GetFieldValue(
                    search_util.GetFieldInDocument(self._doc_pb, field))
                if not field_val:
                    continue
                doc_words = [
                    token.chars for token in
                    self._case_preserving_tokenizer.TokenizeText(field_val)
                ]

                position = posting.positions[0]
                return self._GenerateSnippet(
                    doc_words, position,
                    search_util.DEFAULT_MAX_SNIPPET_LENGTH)
            else:
                field_val = search_util.GetFieldValue(
                    search_util.GetFieldInDocument(self._doc_pb, field))
                if not field_val:
                    return None
                return '%s...' % field_val[:search_util.
                                           DEFAULT_MAX_SNIPPET_LENGTH]
Ejemplo n.º 4
0
    def _Eval(self, node):
        """Evaluate an expression node on the document.

    Args:
      node: The expression AST node representing an expression subtree.

    Returns:
      The Python value that maps to the value of node. Types are inferred from
      the expression, so expressions with numeric results will return as python
      int/long/floats, textual results will be strings, and dates will be
      datetimes.

    Raises:
      _ExpressionError: The expression cannot be evaluated on this document
      because either the expression is malformed or the document does not
      contain the required fields. Callers of _Eval should catch
      _ExpressionErrors and optionally log them; these are not fatal in any way,
      and are used to indicate that this expression should not be set on this
      document.
    """
        if node.getType() in self._function_table:
            func = self._function_table[node.getType()]

            return func(*node.children)

        if node.getType() == ExpressionParser.PLUS:
            return self._EvalBinaryOp(lambda a, b: a + b, 'addition', node)
        if node.getType() == ExpressionParser.MINUS:
            return self._EvalBinaryOp(lambda a, b: a - b, 'subtraction', node)
        if node.getType() == ExpressionParser.DIV:
            return self._EvalBinaryOp(lambda a, b: a / b, 'division', node)
        if node.getType() == ExpressionParser.TIMES:
            return self._EvalBinaryOp(lambda a, b: a * b, 'multiplication',
                                      node)
        if node.getType() == ExpressionParser.NEG:
            return self._EvalUnaryOp(lambda a: -a, 'negation', node)

        if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT):
            return float(query_parser.GetQueryNodeText(node))
        if node.getType() == ExpressionParser.PHRASE:
            return query_parser.GetQueryNodeText(node).strip('"')

        if node.getType() == ExpressionParser.NAME:
            name = query_parser.GetQueryNodeText(node)
            if name == '_score':
                return self._doc.score
            field = search_util.GetFieldInDocument(self._doc_pb, name)
            if field:
                return self._GetFieldValue(field)
            raise _ExpressionError('No field %s in document' % name)

        raise _ExpressionError('Unable to handle node %s' % node)
Ejemplo n.º 5
0
    def SortKey(scored_doc):
      """Return the sort key for a document based on the request parameters."""
      field = search_util.GetFieldInDocument(
          scored_doc.document, sort_spec.sort_expression())
      if not field:
        return default_value

      string_val = field.value().string_value()
      if field.value().type() in search_util.NUMBER_DOCUMENT_FIELD_TYPES:
        return float(string_val)
      if field.value().type() is document_pb.FieldValue.DATE:
        return search_util.EpochTime(search_util.DeserializeDate(string_val))
      return string_val
Ejemplo n.º 6
0
    def _MatchField(self, field_query_node, match, document):
        """Check if a field matches a query tree."""

        if isinstance(field_query_node, str):
            field = search_util.GetFieldInDocument(document, field_query_node)
        else:
            field = search_util.GetFieldInDocument(document,
                                                   field_query_node.getText())
        if not field:
            return False

        if field.value().type() in search_util.TEXT_DOCUMENT_FIELD_TYPES:
            return self._MatchTextField(field, match, document)

        if field.value().type() in search_util.NUMBER_DOCUMENT_FIELD_TYPES:
            return self._MatchNumericField(field, match, document)

        if field.value().type() == document_pb.FieldValue.DATE:
            return self._MatchDateField(field, match, document)

        raise search_util.UnsupportedOnDevError(
            'Matching to field type of field "%s" (type=%d) is unsupported on '
            'dev server' % (field.name(), field.value().type()))
Ejemplo n.º 7
0
    def _CheckInvalidNumericComparison(self, match, document):
        """Check if this is an invalid numeric comparison.

    Valid numeric comparisons are "numeric_field OP numeric_constant" where OP
    is one of [>, <, >=, <=, =, :].

    Args:
      match: The right hand side argument of the operator.
      document: The document we are checking for a match.

    Raises:
      ExpressionTreeException: when right hand side of numeric comparison is not
      a numeric constant.
    """
        match_text = query_parser.GetQueryNodeText(match)
        match_fields = search_util.GetFieldInDocument(
            document, match_text, document_pb.FieldValue.NUMBER)

        if match_fields:
            raise ExpressionTreeException(
                'Expected numeric constant, found \"' + match_text + '\"')
    def _Snippet(self, query, field, *args):
        field = query_parser.GetQueryNodeText(field)
        terms = self._tokenizer.TokenizeText(
            query_parser.GetQueryNodeText(query).strip('"'))
        for term in terms:
            search_token = tokens.Token(chars=u'%s:%s' % (field, term.chars))
            postings = self._inverted_index.GetPostingsForToken(search_token)
            for posting in postings:
                if posting.doc_id != self._doc_pb.id(
                ) or not posting.positions:
                    continue

                field_val = search_util.GetFieldValue(
                    search_util.GetFieldInDocument(self._doc_pb, field))
                doc_words = [
                    token.chars for token in
                    self._case_preserving_tokenizer.TokenizeText(field_val)
                ]

                position = posting.positions[0]
                return self._GenerateSnippet(
                    doc_words, position,
                    search_util.DEFAULT_MAX_SNIPPET_LENGTH)
Ejemplo n.º 9
0
    def _Eval(self, node, return_type=None, allow_rank=True):
        """Evaluate an expression node on the document.

    Args:
      node: The expression AST node representing an expression subtree.
      return_type: The type to retrieve for fields with multiple types
        in the expression. Used when the field type is ambiguous and cannot be
        inferred from the context. If None, we retrieve the first field type
        found in doc list.
      allow_rank: For expressions that will be used in a sort context, indicate
        if rank is allowed.

    Returns:
      The Python value that maps to the value of node. Types are inferred from
      the expression, so expressions with numeric results will return as python
      int/long/floats, textual results will be strings, and dates will be
      datetimes.

    Raises:
      _ExpressionError: The expression cannot be evaluated on this document
        because either the expression is malformed or the document does not
        contain the required fields. Callers of _Eval should catch
        _ExpressionErrors and optionally log them; these are not fatal in any
        way and are used to indicate that this expression should not be set on
        this document.
      QueryExpressionEvaluationError: same as ExpressionEvaluationError but
        these errors should return query as error status to users.
    """
        if node.getType() in self._function_table:
            func = self._function_table[node.getType()]

            return func(return_type, *node.children)

        if node.getType() == ExpressionParser.PLUS:
            return self._EvalNumericBinaryOp(lambda a, b: a + b, 'addition',
                                             node, return_type)
        if node.getType() == ExpressionParser.MINUS:
            return self._EvalNumericBinaryOp(lambda a, b: a - b, 'subtraction',
                                             node, return_type)
        if node.getType() == ExpressionParser.DIV:
            return self._EvalNumericBinaryOp(lambda a, b: a / b, 'division',
                                             node, return_type)
        if node.getType() == ExpressionParser.TIMES:
            return self._EvalNumericBinaryOp(lambda a, b: a * b,
                                             'multiplication', node,
                                             return_type)
        if node.getType() == ExpressionParser.NEG:
            return self._EvalNumericUnaryOp(lambda a: -a, 'negation', node,
                                            return_type)
        if node.getType() in (ExpressionParser.INT, ExpressionParser.FLOAT):
            return float(query_parser.GetQueryNodeText(node))
        if node.getType() == ExpressionParser.PHRASE:
            return query_parser.GetQueryNodeText(node).strip('"')

        if node.getType() == ExpressionParser.NAME:
            name = query_parser.GetQueryNodeText(node)
            if name == '_score':
                return self._doc.score
            elif name == '_rank':
                if allow_rank:
                    return self._doc.document.order_id()
                else:
                    raise QueryExpressionEvaluationError(
                        'SortSpec order must be descending in \'_rank\'')

            field = search_util.GetFieldInDocument(self._doc_pb, name,
                                                   return_type)
            if field:
                return self._GetFieldValue(field)
            raise _ExpressionError('No field %s in document' % name)

        raise _ExpressionError('Unable to handle node %s' % node)