Exemple #1
0
def compareAndMerge(grp_id):
    pre_work()
    grp2_id = None
    grp2_keywords = None
    max_match = 0
    #print("compareAndMerge: ", grp_id)
    group1_keywords = read_list(
        f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy")
    for g_id in GRP_IDS:
        if g_id != grp_id:
            group2_keywords = read_list(
                f"data/group_profiles/group_keywords/{g_id}_group_keywords.npy"
            )
            similarity = findSimilarity(group1_keywords, group2_keywords)
            if similarity >= THRESHOLD:
                print("CompareAndMerge: similarity>=THRESHOLD: ", similarity,
                      "Group1 id: ", grp_id, "Group2 id: ", g_id)
                if similarity > max_match:
                    max_match = similarity
                    grp2_id = g_id
                    grp2_keywords = group2_keywords
            else:
                print("CompareAndMerge: similarity<THRESHOLD: ", similarity,
                      "Group1 id: ", grp_id, "Group2 id: ", g_id)
    if grp2_id is not None:
        group1_data = read_dictionary(
            f"data/group_profiles/group_information/{grp_id}_group_data.npy")
        group2_data = read_dictionary(
            f"data/group_profiles/group_information/{grp2_id}_group_data.npy")
        mergeTwoGroups(group1_data, group1_keywords, group2_data,
                       grp2_keywords)
Exemple #2
0
def groupPersonalisation(results, user_id):
    if os.path.exists("data/user_to_group.npy"):
        user_to_grp = read_dictionary("data/user_to_group.npy")
    grp_id = user_to_grp[user_id]
    if os.path.exists(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy"):
        grp_keywords = read_list(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy")
    grp_results = []
    for res in results:
        page_keywords = get_page_keywords(res[0])
        similarity = findSimilarity(page_keywords, grp_keywords)
        grp_results.append((res[0], similarity))
    grp_results = Sort(grp_results)
    return grp_results
def pre_work(user_id):
    global DOCID_TO_CLICKS, URL_TO_DOCIDS, USER_KEYWORDS
    if os.path.exists(
            f"data/user_profiles/docid_to_clicks/{user_id}_docid_to_clicks.npy"
    ):
        DOCID_TO_CLICKS = read_dictionary(
            f"data/user_profiles/docid_to_clicks/{user_id}_docid_to_clicks.npy"
        )
    if os.path.exists("data/already_visited_urls.npy"):
        URL_TO_DOCIDS = read_dictionary("data/already_visited_urls.npy")
    if os.path.exists(
            f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy"):
        USER_KEYWORDS = read_list(
            f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy")
Exemple #4
0
def pre_work():
    global GRP_IDS
    if os.path.exists("data/GRP_IDS.npy"):
        GRP_IDS = read_list("data/GRP_IDS.npy")
Exemple #5
0
def get_grp_keywords(grp_id):
    if os.path.exists(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy"):
        return read_list(f"data/group_profiles/group_keywords/{grp_id}_group_keywords.npy")
Exemple #6
0
def get_user_keywords(user_id):
    if os.path.exists(f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy"):
        return read_list(f"data/user_profiles/user_keywords/{user_id}_user_keywords.npy")
def get_doc_keywords(doc_id):
    keywords = []
    if os.path.exists(f"data/doc_pages_keywords/{doc_id}.npy"):
        keywords = read_list(f"data/doc_pages_keywords/{doc_id}.npy")
    return keywords
Exemple #8
0
def get_page_keywords(docID):
    doc_keywords = []
    if os.path.exists(f"data/doc_pages_keywords/{docID}.npy"):   
        doc_keywords = read_list(f"data/doc_pages_keywords/{docID}.npy")
    return doc_keywords