def format_query(q, db): parsed = parse_query(q) parsed_split = [] for label,token in parsed: l,t = label,token if l == "QUOTE": if t[-1] != '"': t += '"' subtokens = t[1:-1].split(" ") parsed_split += [("QUOTE_S",sub_t) for sub_t in subtokens if sub_t] else: parsed_split += [(l,t)] output_string = [] label, token = parsed_split[-1] prefix = " ".join('"'+t[1]+'"' if t[0] == "QUOTE_S" else t[1] for t in parsed_split[:-1]) if prefix: prefix = prefix + " " expanded = [] if label == "QUOTE_S" or label == "TERM": norm_tok = token.decode("utf-8").lower() norm_tok = [i for i in unicodedata.normalize("NFKD",norm_tok) if not unicodedata.combining(i)] norm_tok = "".join(norm_tok).encode("utf-8") matches = word_pattern_search(norm_tok,db.locals["db_path"]+"/frequencies/normalized_word_frequencies") substr_token = token.decode("utf-8").lower().encode("utf-8") exact_matches = exact_word_pattern_search(substr_token + '.*',db.locals["db_path"]+"/frequencies/word_frequencies") for m in exact_matches: if m not in matches: matches.append(m) matches = highlighter(matches, len(norm_tok)) for m in matches: if label == "QUOTE_S": output_string.append(prefix + '"%s"' % m) else: output_string.append(prefix + m) return output_string
def format_query(q, db): parsed = parse_query(q) parsed_split = [] for label,token in parsed: l,t = label,token if l == "QUOTE": subtokens = t[1:-1].split(" ") parsed_split += [("QUOTE_S",sub_t) for sub_t in subtokens if sub_t] else: parsed_split += [(l,t)] output_string = [] prior_label = "OR" # print parsed_split for label, token in parsed_split: if label == "QUOTE_S": output_string += token.split() elif label == "TERM": expanded = [] norm_tok = token.decode("utf-8").lower() norm_tok = [i for i in unicodedata.normalize("NFKD",norm_tok) if not unicodedata.combining(i)] norm_tok = "".join(norm_tok).encode("utf-8") matches = word_pattern_search(norm_tok,db.locals["db_path"]+"/frequencies/normalized_word_frequencies") for m in matches: if m not in expanded: expanded += [m] output_string += expanded # print >> sys.stderr, expanded return output_string