if condition[0] in ifreq: or_docs = or_docs.union(set(elem[0] for elem in ifreq[condition[0]])) else: alldocs = set(freq.keys()) NOT = alldocs if condition[0] not in ifreq else alldocs.difference( set(elem[0] for elem in ifreq[condition[0]])) or_docs = or_docs.union(NOT) if init: docs = docs.union(or_docs) init = 0 else: docs = docs.intersection(or_docs) return docs if __name__ == "__main__": parser = argparse.ArgumentParser(description="Query CACM or WIKI and get boolean results") parser.add_argument("-c", "--collection", default="CACM", help="The collection we want to query from") parser.add_argument("-q", "--query", default="(A|B)&(C|^D)", help="The words we want to search (for several words use '' ). \ Your query must be of the form n-SAT: (A|..|N)&(..)&..&(M|..|Z) \ and can use ^ as a specific NOT to a word.\ Absolutely no space (' ') should be used") args = parser.parse_args() query = args.query words = query.replace("(", "").replace(")", "").replace("&", " ").replace("|", " ").replace("^", "").split(" ") freq, ifreq = loadJsons(args.collection, words) parsed_query = parse_bool_query(query) print find_documents(parsed_query, freq, ifreq)
def main(collection, reverseType, query): words = searchToQuery(query) freq, ifreq = loadJsons(collection, reverseType, words) return projectionQuery(words, ifreq)