Ejemplo n.º 1
0
def query(s, indexFile, btree, normDict, cache):

    # return if invalid input
    if not len(s) > 0:
        return set()

    # prepare the query string
    s = str.strip(s)
    s = preProcessString(s)

    # ensure that the string is still valid
    # after the preprocessing step
    if not len(s) > 0:
        return set()
    if s == "*":
        return set()

    # if the query is quoted, perform a phrase query
    if isWrappedInQuotes(s):
        return phraseQuery(s, indexFile, btree, normDict, cache)

    tup = bool_expr_ast(s)

    if isinstance(tup, str):

        return freeTextQuery(tup, indexFile, btree, normDict, cache)
    else:
        return boolQuery(tup, indexFile, btree, normDict, cache)
Ejemplo n.º 2
0
def query(s):
    s = str.strip(s)
    if isWrappedInQuotes(s):
        return phraseQuery(s)

    tup = bool_expr_ast(s)
    if isinstance(tup, str):
        return freeTextQuery(tup)
    else:
        return boolQuery(tup)
Ejemplo n.º 3
0
def getQueryType(query):
	if query[0]=="\"" and query[len(query)-1]=="\"":
		return PHRASE_QUERY
	if type(bool_expr_ast(query)).__name__=='tuple':
		return BOOLEAN_QUERY
	for i in range(len(query)):
		if query[i] == " ":
			return FREE_TEXT_QUERY
		if query[i] == "*":
			return WILDCARD_QUERY
	return ONE_WORD_QUERY
Ejemplo n.º 4
0
 def parse_query(self, query):
     query_type = self.determine_query(query)
     if query_type == 'BQ':
         res = bool_expr_ast(query)
         return query_type, self.bq(res)
     elif query_type == 'OWQ':
         return query_type, self.owq(query)
     elif query_type == 'FTQ':
         return query_type, self.ftq(query)
     else:
         query = query[1:-1]
         return query_type, self.ftq(query)
Ejemplo n.º 5
0
 def parse_query(self, query):
     query_type = self.determine_query(query)
     if query_type == 'BQ':
         res = bool_expr_ast(query)
         return query_type, self.bq(res)
     elif query_type == 'OWQ': 
         return query_type, self.owq(query)
     elif query_type == 'FTQ':
         return query_type, self.ftq(query)         
     else:
         query = query[1:-1]
         return query_type, self.ftq(query)
Ejemplo n.º 6
0
def processQuery(query):
    term_to_idf = {}
    stemmed = stemmedQuery(query)
    for term in stemmed:
       term_to_idf[term] = calculate_idf_for_term(term)

    tmpquery = query.replace('*', '')
    term_count = len(re.compile(r'\b\w+\b').findall(tmpquery))
    
    if query.find('*') != -1:
        # Must be dealing with a wildcard query,
        # since a * exists in the query.
        if term_count == 1:
            # Single word wildcard query.
            processWQ(query, term_to_idf)
            return
        else:
            # We're not handling wildcard phrase queries
            sys.stdout.write("\n")
            return     
    if len(stemmedQuery(query)) < 1:
        # Search query has zero terms!
        sys.stdout.write("\n")
        return
    
    if term_count == 1:
        # Single word query:
        processOWQ(stemmed[0], term_to_idf)
        return
    elif query.find("\"") != -1:
        processPQ(stemmed, term_to_idf)
        return
    elif query != bool_expr_ast(query):
        # We have a bool AST; must be a BQ
        processBQ(bool_expr_ast(query), term_to_idf)
        return
    else:
        processFTQ(stemmed, term_to_idf)
        return
Ejemplo n.º 7
0
def parseBooleanQuery(query):
	# Escape AND/OR operators
	query = query.replace("AND", "1and")
	query = query.replace("OR", "1or")
	query = query.lower()
	#query = tokenize(query)
	query = removeStopWords(query)
	query = stemWords(query)
	query = query.replace("1and", "AND")
	query = query.replace("1or", "OR")
	queryWords = query.replace("AND", "").replace("OR", "").replace("(", "").replace(")", "").replace("  ", " ")
	data = parseFreeTextQuery(queryWords)
	query = bool_expr_ast(query)    
	return removeDocsFromData(data, getDocsFromBool(query))
Ejemplo n.º 8
0
# Module for testing bool expression parser.
# Reads a line at a time  from the stdin and prints the parsed expr to stdout.
#
# Olga Ohrimenko
# 02/2011

import sys
from bool_parser import bool_expr_ast

line = sys.stdin.readline()
while line != '':
  res = bool_expr_ast(line)
  a,b = res
  print a
  print b
  line = sys.stdin.readline()
Ejemplo n.º 9
0
# Module for testing bool expression parser.
# Reads a line at a time  from the stdin and prints the parsed expr to stdout.
#
# Olga Ohrimenko
# 02/2011

import sys
from bool_parser import bool_expr_ast

line = sys.stdin.readline()
while line != '':
    res = bool_expr_ast(line)
    a, b = res
    print a
    print b
    line = sys.stdin.readline()