def get_top(q, docs, mod, km_mod, cl_as): res = [] q_title = mod[q["desc"]] q_vec = np.asarray(make_vector(q_title)).reshape(1, -1) q_cluster = km_mod.predict(q_vec) test_doc_ids = cl_as[q_cluster[0]] for d_id in q["results"]: if d_id not in test_doc_ids: test_doc_ids.append(d_id) for i in test_doc_ids: doc = docs[i] try: doc_title = mod[doc["abstract"]] except: doc_title = mod[doc["title"]] sim_score = utils.compute_sim(q_title, doc_title) if i in q["results"]: res.append(((i, q["results"][i], sim_score))) else: res.append(((i, 0, sim_score))) for i in q["results"]: doc = docs[i] try: doc_title = mod[doc["abstract"]] except: doc_title = mod[doc["title"]] sim_score = utils.compute_sim(q_title, doc_title) if i not in test_doc_ids: res.append(((i, q["results"][i], 0))) return res
def get_top(q, docs, mod, km_mod, cl_as): res = [] q_title = mod[q['desc']] q_vec = np.asarray(make_vector(q_title)).reshape(1, -1) q_cluster = km_mod.predict(q_vec) test_doc_ids = cl_as[q_cluster[0]] for d_id in q['results']: if d_id not in test_doc_ids: test_doc_ids.append(d_id) for i in test_doc_ids: doc = docs[i] try: doc_title = mod[doc['abstract']] except: doc_title = mod[doc['title']] sim_score = utils.compute_sim(q_title, doc_title) if i in q['results']: res.append(((i, q['results'][i], sim_score))) else: res.append(((i, 0, sim_score))) for i in q['results']: doc = docs[i] try: doc_title = mod[doc['abstract']] except: doc_title = mod[doc['title']] sim_score = utils.compute_sim(q_title, doc_title) if i not in test_doc_ids: res.append(((i, q['results'][i], 0))) return res