def extract_daily_facts(user_1_facts, user_2_facts):
    """

    :param user_1_facts:
    :param user_2_facts:
    :return:
    """
    f1 = 0
    f2 = 0
    f3 = 1.0
    f4 = 1.0
    num_days = 0
    for day in user_1_facts:
        if day in user_2_facts:
            num_days += 1
            facts1 = user_1_facts[day]
            facts2 = user_2_facts[day]
            jaccard1 = matutils.jaccard(facts1.items(), facts2.items())
            jaccard2 = matutils.jaccard(facts1, facts2)
            f1 += jaccard1
            f2 += jaccard2
            f3 = min(f3, jaccard1)
            f4 = min(f4, jaccard2)

    if num_days > 0:
        f1 = float(f1) / num_days
        f2 = float(f2) / num_days
    else:
        f1 = -999
        f2 = -999
        f3 = -999
        f4 = -999
    return f1, f2, f3, f4, float(num_days) / (len(user_1_facts) +
                                              len(user_2_facts) - num_days)
コード例 #2
0
            vect1 = user_to_topic[u1]
            vect2 = user_to_topic[u2]
            topic_sim = 1 - spatial.distance.cosine(vect1, vect2)

            vec1 = doc2vecmodel.docvecs[u1]
            vec2 = doc2vecmodel.docvecs[u2]

            doc2vec_sim = 1 - spatial.distance.cosine(vec1, vec2)

            bow1 = user_to_words[u1]
            bow2 = user_to_words[u2]

            lda_bow1 = user_to_lda_bow[u1]
            lda_bow2 = user_to_lda_bow[u2]

            hellinger_score = matutils.hellinger(
                lda_bow1, lda_bow2)  # hellinger(lda_bow2, lda_bow1)

            cosine_score = matutils.cossim(lda_bow1, lda_bow2)
            jaccard_word_score = matutils.jaccard(bow1, bow2)
            jaccard_lda_score = matutils.jaccard(lda_bow1, lda_bow2)

            outfile.write("{} {} {} {} {} {} {} {} {} {}\n".format(
                line.strip(), share_facts,
                float(share_facts) / union_facts, tfidf_sim, topic_sim,
                hellinger_score, cosine_score, jaccard_word_score,
                jaccard_lda_score, doc2vec_sim))
    outfile.close()
    i += 2
コード例 #3
0

i = 4

while i < len(sys.argv):
    outfile = open(sys.argv[i + 1], 'w')
    with open(sys.argv[i], 'r') as infile:
        for line in tqdm(infile):
            splits = line.strip().split()
            u1, u2 = splits[0].split(",")

            vect1 = tfidf_days[user_to_time_index[u1]].toarray()[0]
            vect2 = tfidf_days[user_to_time_index[u2]].toarray()[0]
            f1 = 1 - spatial.distance.cosine(vect1, vect2)

            f2 = matutils.jaccard(user_to_time[u1], user_to_time[u2])
            f3 = matutils.jaccard(user_to_time_id[u1], user_to_time_id[u2])

            f4 = compute_euclidean_distance(user_to_hour[u1], user_to_hour[u2])

            f5 = compute_euclidean_datetime_distance(user_to_datetime[u1],
                                                     user_to_datetime[u2])

            f6 = compute_cosine_distance(user_to_hour[u1], user_to_hour[u2])

            f7 = compute_cosine_datetime_distance(user_to_datetime[u1],
                                                  user_to_datetime[u2])

            dist_list, share_time = compute_dist(user_to_hour[u1],
                                                 user_to_hour[u2])