def _parseAtom(self): term = self._get(_ATOM) words = self._lexicon.parseTerms(term) if not words: self._ignored.append(term) return None if len(words) > 1: tree = ParseTree.PhraseNode(words) elif self._lexicon.isGlob(words[0]): tree = ParseTree.GlobNode(words[0]) else: tree = ParseTree.AtomNode(words[0]) if term[0] == "-": tree = ParseTree.NotNode(tree) return tree
def parseQuery(self, query): # Lexical analysis. try: if not isinstance(query, unicode): query = query.decode('utf-8') tokens = _tokenizer_unicode_regex.findall(query) except UnicodeDecodeError: tokens = _tokenizer_regex.findall(query) self._tokens = tokens # classify tokens self._tokentypes = [ _keywords.get(token.upper(), _ATOM) for token in tokens ] # add _EOF self._tokens.append(_EOF) self._tokentypes.append(_EOF) self._index = 0 # Syntactical analysis. self._ignored = [] # Ignored words in the query, for parseQueryEx tree = self._parseOrExpr() self._require(_EOF) if tree is None: raise ParseTree.ParseError("Query contains only common words: %s" % repr(query)) return tree
def _parseNotExpr(self): if self._check(_NOT): t = self._parseTerm() if t is None: return None # Only stopwords return ParseTree.NotNode(t) else: return self._parseTerm()
def _parseOrExpr(self): L = [] L.append(self._parseAndExpr()) while self._check(_OR): L.append(self._parseAndExpr()) L = filter(None, L) if not L: return None # Only stopwords elif len(L) == 1: return L[0] else: return ParseTree.OrNode(L)
def _parseTerm(self): if self._check(_LPAREN): tree = self._parseOrExpr() self._require(_RPAREN) else: nodes = [] nodes = [self._parseAtom()] while self._peek(_ATOM): nodes.append(self._parseAtom()) nodes = filter(None, nodes) if not nodes: return None # Only stopwords structure = [(isinstance(nodes[i], ParseTree.NotNode), i, nodes[i]) for i in range(len(nodes))] structure.sort() nodes = [node for (bit, index, node) in structure] if isinstance(nodes[0], ParseTree.NotNode): raise ParseTree.ParseError( "a term must have at least one positive word") if len(nodes) == 1: return nodes[0] tree = ParseTree.AndNode(nodes) return tree
def _parseAndExpr(self): L = [] t = self._parseTerm() if t is not None: L.append(t) Nots = [] while self._check(_AND): t = self._parseNotExpr() if t is None: continue if isinstance(t, ParseTree.NotNode): Nots.append(t) else: L.append(t) if not L: return None # Only stopwords L.extend(Nots) if len(L) == 1: return L[0] else: return ParseTree.AndNode(L)
def _require(self, tokentype): if not self._check(tokentype): t = self._tokens[self._index] msg = "Token %r required, %r found" % (tokentype, t) raise ParseTree.ParseError(msg)