def runEngine(tokenizer_type, address): print("RUNNING ENGINE ...") inverted_index = load_obj("INVERTED INDEX " + tokenizer_type + "_" + address) tfidfs = load_obj("CHAMPIONS " + tokenize_type + "_" + address) query = "" while True: query = input("QUERY :>") if query == "!q": break queryToken = query.split(" ") queryw = tf_idf(queryToken, inverted_index, False) h = Heap() for i in range(len(tfidfs)): if bool(set(queryToken) & set(tfidfs[i].keys())): sim = querySimilarity(queryw, tfidfs[i]) if sim != 0: h.addnSort([i, sim]) k = 10 result = h.getFirstK(k) titles = fetch_column(address, 'title') for i in range(k): print(titles[result[i][0]][::-1])
def callback(self): print("GOT TOKEN ", self.text.get("1.0", END)) queryToken = self.text.get("1.0", END).replace("\n", "").split(" ") queryToken = normalize_query(queryToken) print(queryToken) queryw = tf_idf(queryToken, self.inverted_index, False) h = Heap() for i in range(len(self.tfidfs)): if bool(set(queryToken) & set(self.tfidfs[i].keys())): sim = querySimilarity(queryw, self.tfidfs[i]) if sim != 0: h.addnSort([i, sim]) k = 10 result = h.getFirstK(k) k = min(len(result), k) print(k) for i in range(k): print(self.titles[result[i][0]]) self.mylist.delete(i) self.mylist.insert(i, self.titles[result[i][0]])