def parse_query(raw_query): """in: a query as entered by the user out: a list whose keys are canonical terms from the query""" logging.debug("Parsing query") query = [] for raw_term in raw_query.split(" "): term = cleanup_term(raw_term) if term not in query: query.append(term) return query
def build_index(index, doclist): """in: existing index, list of document names to add to the index out: updated index, a dictionary whose keys are cleaned-up terms and whose values are a list of documents containing the term""" logging.debug("Building index") for docname in doclist: doctext = load_document(docname) for raw_term in split_document(doctext): term = cleanup_term(raw_term) if term and index.get(term, None): entry = index[term] if docname not in entry: index[term].append(docname) else: index[term] = [docname]