def compareAndMerge(grp_id): pre_work() grp2_id = None grp2_keywords = None max_match = 0 #print("compareAndMerge: ", grp_id) group1_keywords = read_list( f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy") for g_id in GRP_IDS: if g_id != grp_id: group2_keywords = read_list( f"data/group_profiles/group_keywords/{g_id}_group_keywords.npy" ) similarity = findSimilarity(group1_keywords, group2_keywords) if similarity >= THRESHOLD: print("CompareAndMerge: similarity>=THRESHOLD: ", similarity, "Group1 id: ", grp_id, "Group2 id: ", g_id) if similarity > max_match: max_match = similarity grp2_id = g_id grp2_keywords = group2_keywords else: print("CompareAndMerge: similarity<THRESHOLD: ", similarity, "Group1 id: ", grp_id, "Group2 id: ", g_id) if grp2_id is not None: group1_data = read_dictionary( f"data/group_profiles/group_information/{grp_id}_group_data.npy") group2_data = read_dictionary( f"data/group_profiles/group_information/{grp2_id}_group_data.npy") mergeTwoGroups(group1_data, group1_keywords, group2_data, grp2_keywords)
def groupPersonalisation(results, user_id): if os.path.exists("data/user_to_group.npy"): user_to_grp = read_dictionary("data/user_to_group.npy") grp_id = user_to_grp[user_id] if os.path.exists(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy"): grp_keywords = read_list(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy") grp_results = [] for res in results: page_keywords = get_page_keywords(res[0]) similarity = findSimilarity(page_keywords, grp_keywords) grp_results.append((res[0], similarity)) grp_results = Sort(grp_results) return grp_results
def pre_work(user_id): global DOCID_TO_CLICKS, URL_TO_DOCIDS, USER_KEYWORDS if os.path.exists( f"data/user_profiles/docid_to_clicks/{user_id}_docid_to_clicks.npy" ): DOCID_TO_CLICKS = read_dictionary( f"data/user_profiles/docid_to_clicks/{user_id}_docid_to_clicks.npy" ) if os.path.exists("data/already_visited_urls.npy"): URL_TO_DOCIDS = read_dictionary("data/already_visited_urls.npy") if os.path.exists( f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy"): USER_KEYWORDS = read_list( f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy")
def pre_work(): global GRP_IDS if os.path.exists("data/GRP_IDS.npy"): GRP_IDS = read_list("data/GRP_IDS.npy")
def get_grp_keywords(grp_id): if os.path.exists(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy"): return read_list(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy")
def get_user_keywords(user_id): if os.path.exists(f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy"): return read_list(f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy")
def get_doc_keywords(doc_id): keywords = [] if os.path.exists(f"data/doc_pages_keywords/{doc_id}.npy"): keywords = read_list(f"data/doc_pages_keywords/{doc_id}.npy") return keywords
def get_page_keywords(docID): doc_keywords = [] if os.path.exists(f"data/doc_pages_keywords/{docID}.npy"): doc_keywords = read_list(f"data/doc_pages_keywords/{docID}.npy") return doc_keywords