Example #1
0
                if condition[0] in ifreq:
                    or_docs = or_docs.union(set(elem[0] for elem in ifreq[condition[0]]))
            else:
                alldocs = set(freq.keys())
                NOT = alldocs if condition[0] not in ifreq else alldocs.difference(
                    set(elem[0] for elem in ifreq[condition[0]]))
                or_docs = or_docs.union(NOT)
        if init:
            docs = docs.union(or_docs)
            init = 0
        else:
            docs = docs.intersection(or_docs)
    return docs


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Query CACM or WIKI and get boolean results")
    parser.add_argument("-c", "--collection", default="CACM", help="The collection we want to query from")
    parser.add_argument("-q", "--query", default="(A|B)&(C|^D)",
                        help="The words we want to search (for several words use '' ). \
						Your query must be of the form n-SAT: (A|..|N)&(..)&..&(M|..|Z) \
						and can use ^ as a specific NOT to a word.\
						Absolutely no space (' ') should be used")
    args = parser.parse_args()
    query = args.query
    words = query.replace("(", "").replace(")", "").replace("&", " ").replace("|", " ").replace("^", "").split(" ")
    freq, ifreq = loadJsons(args.collection, words)
    parsed_query = parse_bool_query(query)

    print find_documents(parsed_query, freq, ifreq)
Example #2
0
def main(collection, reverseType, query):
    words = searchToQuery(query)
    freq, ifreq = loadJsons(collection, reverseType, words)
    return projectionQuery(words, ifreq)