예제 #1
0
 def _parseAtom(self):
     term = self._get(_ATOM)
     words = self._lexicon.parseTerms(term)
     if not words:
         self._ignored.append(term)
         return None
     if len(words) > 1:
         tree = ParseTree.PhraseNode(words)
     elif self._lexicon.isGlob(words[0]):
         tree = ParseTree.GlobNode(words[0])
     else:
         tree = ParseTree.AtomNode(words[0])
     if term[0] == "-":
         tree = ParseTree.NotNode(tree)
     return tree
def parseQuery(self, query):
    # Lexical analysis.
    try:
        if not isinstance(query, unicode):
            query = query.decode('utf-8')
        tokens = _tokenizer_unicode_regex.findall(query)
    except UnicodeDecodeError:
        tokens = _tokenizer_regex.findall(query)
    self._tokens = tokens
    # classify tokens
    self._tokentypes = [
        _keywords.get(token.upper(), _ATOM) for token in tokens
    ]
    # add _EOF
    self._tokens.append(_EOF)
    self._tokentypes.append(_EOF)
    self._index = 0

    # Syntactical analysis.
    self._ignored = []  # Ignored words in the query, for parseQueryEx
    tree = self._parseOrExpr()
    self._require(_EOF)
    if tree is None:
        raise ParseTree.ParseError("Query contains only common words: %s" %
                                   repr(query))
    return tree
예제 #3
0
 def _parseNotExpr(self):
     if self._check(_NOT):
         t = self._parseTerm()
         if t is None:
             return None  # Only stopwords
         return ParseTree.NotNode(t)
     else:
         return self._parseTerm()
예제 #4
0
 def _parseOrExpr(self):
     L = []
     L.append(self._parseAndExpr())
     while self._check(_OR):
         L.append(self._parseAndExpr())
     L = filter(None, L)
     if not L:
         return None  # Only stopwords
     elif len(L) == 1:
         return L[0]
     else:
         return ParseTree.OrNode(L)
예제 #5
0
 def _parseTerm(self):
     if self._check(_LPAREN):
         tree = self._parseOrExpr()
         self._require(_RPAREN)
     else:
         nodes = []
         nodes = [self._parseAtom()]
         while self._peek(_ATOM):
             nodes.append(self._parseAtom())
         nodes = filter(None, nodes)
         if not nodes:
             return None  # Only stopwords
         structure = [(isinstance(nodes[i], ParseTree.NotNode), i, nodes[i])
                      for i in range(len(nodes))]
         structure.sort()
         nodes = [node for (bit, index, node) in structure]
         if isinstance(nodes[0], ParseTree.NotNode):
             raise ParseTree.ParseError(
                 "a term must have at least one positive word")
         if len(nodes) == 1:
             return nodes[0]
         tree = ParseTree.AndNode(nodes)
     return tree
예제 #6
0
 def _parseAndExpr(self):
     L = []
     t = self._parseTerm()
     if t is not None:
         L.append(t)
     Nots = []
     while self._check(_AND):
         t = self._parseNotExpr()
         if t is None:
             continue
         if isinstance(t, ParseTree.NotNode):
             Nots.append(t)
         else:
             L.append(t)
     if not L:
         return None  # Only stopwords
     L.extend(Nots)
     if len(L) == 1:
         return L[0]
     else:
         return ParseTree.AndNode(L)
예제 #7
0
 def _require(self, tokentype):
     if not self._check(tokentype):
         t = self._tokens[self._index]
         msg = "Token %r required, %r found" % (tokentype, t)
         raise ParseTree.ParseError(msg)