Example #1
0
def runEngine(tokenizer_type, address):
    print("RUNNING ENGINE ...")
    inverted_index = load_obj("INVERTED INDEX " + tokenizer_type + "_" +
                              address)
    tfidfs = load_obj("CHAMPIONS " + tokenize_type + "_" + address)

    query = ""

    while True:
        query = input("QUERY :>")
        if query == "!q": break

        queryToken = query.split(" ")
        queryw = tf_idf(queryToken, inverted_index, False)

        h = Heap()

        for i in range(len(tfidfs)):
            if bool(set(queryToken) & set(tfidfs[i].keys())):
                sim = querySimilarity(queryw, tfidfs[i])
                if sim != 0:
                    h.addnSort([i, sim])

        k = 10
        result = h.getFirstK(k)
        titles = fetch_column(address, 'title')
        for i in range(k):
            print(titles[result[i][0]][::-1])
Example #2
0
    def callback(self):
        print("GOT TOKEN ", self.text.get("1.0", END))
        queryToken = self.text.get("1.0", END).replace("\n", "").split(" ")
        queryToken = normalize_query(queryToken)
        print(queryToken)
        queryw = tf_idf(queryToken, self.inverted_index, False)

        h = Heap()

        for i in range(len(self.tfidfs)):
            if bool(set(queryToken) & set(self.tfidfs[i].keys())):
                sim = querySimilarity(queryw, self.tfidfs[i])
                if sim != 0:
                    h.addnSort([i, sim])

        k = 10
        result = h.getFirstK(k)
        k = min(len(result), k)
        print(k)
        for i in range(k):
            print(self.titles[result[i][0]])
            self.mylist.delete(i)
            self.mylist.insert(i, self.titles[result[i][0]])