def query(s, indexFile, btree, normDict, cache): # return if invalid input if not len(s) > 0: return set() # prepare the query string s = str.strip(s) s = preProcessString(s) # ensure that the string is still valid # after the preprocessing step if not len(s) > 0: return set() if s == "*": return set() # if the query is quoted, perform a phrase query if isWrappedInQuotes(s): return phraseQuery(s, indexFile, btree, normDict, cache) tup = bool_expr_ast(s) if isinstance(tup, str): return freeTextQuery(tup, indexFile, btree, normDict, cache) else: return boolQuery(tup, indexFile, btree, normDict, cache)
def query(s): s = str.strip(s) if isWrappedInQuotes(s): return phraseQuery(s) tup = bool_expr_ast(s) if isinstance(tup, str): return freeTextQuery(tup) else: return boolQuery(tup)
def getQueryType(query): if query[0]=="\"" and query[len(query)-1]=="\"": return PHRASE_QUERY if type(bool_expr_ast(query)).__name__=='tuple': return BOOLEAN_QUERY for i in range(len(query)): if query[i] == " ": return FREE_TEXT_QUERY if query[i] == "*": return WILDCARD_QUERY return ONE_WORD_QUERY
def parse_query(self, query): query_type = self.determine_query(query) if query_type == 'BQ': res = bool_expr_ast(query) return query_type, self.bq(res) elif query_type == 'OWQ': return query_type, self.owq(query) elif query_type == 'FTQ': return query_type, self.ftq(query) else: query = query[1:-1] return query_type, self.ftq(query)
def processQuery(query): term_to_idf = {} stemmed = stemmedQuery(query) for term in stemmed: term_to_idf[term] = calculate_idf_for_term(term) tmpquery = query.replace('*', '') term_count = len(re.compile(r'\b\w+\b').findall(tmpquery)) if query.find('*') != -1: # Must be dealing with a wildcard query, # since a * exists in the query. if term_count == 1: # Single word wildcard query. processWQ(query, term_to_idf) return else: # We're not handling wildcard phrase queries sys.stdout.write("\n") return if len(stemmedQuery(query)) < 1: # Search query has zero terms! sys.stdout.write("\n") return if term_count == 1: # Single word query: processOWQ(stemmed[0], term_to_idf) return elif query.find("\"") != -1: processPQ(stemmed, term_to_idf) return elif query != bool_expr_ast(query): # We have a bool AST; must be a BQ processBQ(bool_expr_ast(query), term_to_idf) return else: processFTQ(stemmed, term_to_idf) return
def parseBooleanQuery(query): # Escape AND/OR operators query = query.replace("AND", "1and") query = query.replace("OR", "1or") query = query.lower() #query = tokenize(query) query = removeStopWords(query) query = stemWords(query) query = query.replace("1and", "AND") query = query.replace("1or", "OR") queryWords = query.replace("AND", "").replace("OR", "").replace("(", "").replace(")", "").replace(" ", " ") data = parseFreeTextQuery(queryWords) query = bool_expr_ast(query) return removeDocsFromData(data, getDocsFromBool(query))
# Module for testing bool expression parser. # Reads a line at a time from the stdin and prints the parsed expr to stdout. # # Olga Ohrimenko # 02/2011 import sys from bool_parser import bool_expr_ast line = sys.stdin.readline() while line != '': res = bool_expr_ast(line) a,b = res print a print b line = sys.stdin.readline()
# Module for testing bool expression parser. # Reads a line at a time from the stdin and prints the parsed expr to stdout. # # Olga Ohrimenko # 02/2011 import sys from bool_parser import bool_expr_ast line = sys.stdin.readline() while line != '': res = bool_expr_ast(line) a, b = res print a print b line = sys.stdin.readline()