Example #1
0
def calculate_rate_average_global(raw_data, friend_data):
    friend_model = FriendsModel(raw_data,friend_data)
    jaccard_sim = CosineSimilarity(friend_model)
    roster = friend_model.get_friends_roster()
    the_variance = {}
    for u_id in roster:
        f_ids = friend_model.get_strangers_in_roster(u_id)
        f_num = len(friend_model.get_friends(u_id))
        sorted_sims = jaccard_sim.get_similarities(u_id,f_ids)[:f_num]
        t_ids = []
        for u_id, sim in sorted_sims:
        	t_ids.append(u_id)

        mean_rates = friend_model.get_multiple_users_mean_rate(t_ids)
        i_rate = friend_model.get_user_rate_mean(u_id)
        the_means = []

        for ky in mean_rates.keys():
            if mean_rates[ky] == 0:
                continue
            the_means.append(mean_rates[ky])

        if not i_rate == 0:
            #should always be here
            the_means.append(i_rate)

        the_variance[u_id] = np.var(the_means)

    return the_variance
Example #2
0
def get_friends_from_ml100k():
    dst_file = "./src/dataset/movie100k/ml100kfriend.dat"
    if path.isfile(dst_file):
        friend_data = pickle.load(open(dst_file, "rb"))
        return friend_data

    raw_data = get_moive100k(True)
    raw_model = Model(raw_data)
    cosine_sim = CosineSimilarity(raw_model)
    friend_data = {}

    for user_id in raw_model.get_user_ids():
        neighbors = cosine_sim.get_similarities(user_id)[:250]
        user_ids, x = zip(*neighbors)
        user_ids = list(user_ids)
        shuffle(user_ids)
        # note:
        # Randomly choose 150 out of 250 neighbors as friends.
        # In such case, systems is able to (possiblly) choose strangers which
        # are in top-250 similar users, but with a probability slightly
        # smaller than friends selection.
        friend_data[user_id] = user_ids[:150]

    pickle.dump(friend_data, open(dst_file, "w"), protocol=2)
    return friend_data
Example #3
0
def calculate_rate_average_global(raw_data, friend_data):
    friend_model = FriendsModel(raw_data, friend_data)
    jaccard_sim = CosineSimilarity(friend_model)
    roster = friend_model.get_friends_roster()
    the_variance = {}
    for u_id in roster:
        f_ids = friend_model.get_strangers_in_roster(u_id)
        f_num = len(friend_model.get_friends(u_id))
        sorted_sims = jaccard_sim.get_similarities(u_id, f_ids)[:f_num]
        t_ids = []
        for u_id, sim in sorted_sims:
            t_ids.append(u_id)

        mean_rates = friend_model.get_multiple_users_mean_rate(t_ids)
        i_rate = friend_model.get_user_rate_mean(u_id)
        the_means = []

        for ky in mean_rates.keys():
            if mean_rates[ky] == 0:
                continue
            the_means.append(mean_rates[ky])

        if not i_rate == 0:
            #should always be here
            the_means.append(i_rate)

        the_variance[u_id] = np.var(the_means)

    return the_variance
Example #4
0
def get_friends_from_ml100k():
    dst_file = "./src/dataset/movie100k/ml100kfriend.dat"
    if path.isfile(dst_file):
        friend_data = pickle.load(open(dst_file, "rb"))
        return friend_data

    raw_data = get_moive100k(True);
    raw_model = Model(raw_data);
    cosine_sim = CosineSimilarity(raw_model)
    friend_data = {}

    for user_id in raw_model.get_user_ids():
        neighbors = cosine_sim.get_similarities(user_id)[:250]
        user_ids, x = zip(*neighbors)
        user_ids = list(user_ids)
        shuffle(user_ids)
        # note: 
        # Randomly choose 150 out of 250 neighbors as friends.
        # In such case, systems is able to (possiblly) choose strangers which 
        # are in top-250 similar users, but with a probability slightly
        # smaller than friends selection.
        friend_data[user_id] = user_ids[:150] 

    pickle.dump(friend_data,open(dst_file, "w"),protocol=2)
    return friend_data
Example #5
0
def calculate_cosine_similarity_global(raw_data, friend_data):
    friend_model = FriendsModel(raw_data, friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    c_sims = {}

    for u_id in friend_model.get_friends_roster():
        f_num = len(friend_model.get_friends(u_id))
        target_ids = friend_model.get_strangers_in_roster(u_id)
        sorted_sims = cosine_sim.get_similarities(u_id, target_ids)
        c_sims[u_id] = sorted_sims[:f_num]
    return c_sims
Example #6
0
def calculate_cosine_similarity_friends(raw_data, friend_data):

    friend_model = FriendsModel(raw_data, friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    c_sims = {}

    for ky in friend_model.get_friends_roster():
        f_ids = friend_model.get_friends(ky)
        sorted_sims = cosine_sim.get_similarities(ky, f_ids)
        c_sims[ky] = sorted_sims
    return c_sims
Example #7
0
def calculate_cosine_similarity_global(raw_data,friend_data):
    friend_model = FriendsModel(raw_data,friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    c_sims = {}

    for u_id in friend_model.get_friends_roster():
    	f_num = len(friend_model.get_friends(u_id))
        target_ids = friend_model.get_strangers_in_roster(u_id)
        sorted_sims = cosine_sim.get_similarities(u_id,target_ids)
        c_sims[u_id] = sorted_sims[:f_num]
    return c_sims
Example #8
0
def calculate_cosine_similarity_friends(raw_data,friend_data):

    friend_model = FriendsModel(raw_data,friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    c_sims = {}

    for ky in friend_model.get_friends_roster():
        f_ids = friend_model.get_friends(ky)
        sorted_sims = cosine_sim.get_similarities(ky, f_ids)
        c_sims[ky] = sorted_sims
    return c_sims
Example #9
0
def Generate_Simulating_Data_on_MovieLens(user_id, f_num, t_num):
    friend_data = get_friends_from_ml100k()
    raw_data = get_moive100k()
    friend_model = FriendsModel(raw_data, friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    fs = Friends_Strangers(cosine_sim, f_num, t_num)
    friends = fs.get_rand_friends(user_id)
    strangers = fs.get_rand_strangers(user_id)
Example #10
0
def generate_simulating_data(raw_data, friend_data, user_id, f_num, t_num,
                             filelocation):

    if not user_id in friend_data:
        print "invalid user_id!"
        return

    if not path.exists(filelocation):
        makedirs(filelocation)

    friend_model = FriendsModel(raw_data, friend_data)
    cosine_sim = CosineSimilarity(friend_model)
    fs = Friends_Strangers(cosine_sim, f_num, t_num)
    friends = fs.get_rand_friends(user_id)
    strangers = fs.get_rand_strangers(user_id)
    f_ids, f_sims = zip(*friends)
    t_ids, _ = zip(*strangers)

    f_sims = np.dot(f_sims, 8).astype(np.int32).tolist()  #for HE
    #print f_sims
    f1 = filelocation + "similarity_" + str(user_id) + "_" + str(
        f_num) + ".dat"
    save_list_to_file(f1, [f_sims])  #friends similarity

    diskfriend = []
    for f_id in f_ids:
        vec = friend_model.get_dense_user_vector(f_id)
        vec = np.dot(vec, 16).astype(np.int32).tolist()  #for HE
        #vec.tolist()
        diskfriend.append(vec)
    f2 = filelocation + "friend_" + str(user_id) + "_" + str(f_num) + ".dat"
    save_list_to_file(f2, diskfriend)  #friends rating data

    diskstranger = []
    t_ids, __ = zip(*strangers)

    for t_id in t_ids:
        vec = friend_model.get_dense_user_vector(f_id)
        vec = np.dot(vec, 16).astype(np.int32).tolist()  #for HE
        diskstranger.append(vec)
    f3 = filelocation + "stranger_" + str(user_id) + "_" + str(t_num) + ".dat"
    save_list_to_file(f3, diskstranger)  #friends rating data
Example #11
0
def get_similarity_matrix(data_set, friend_data):
    i_model = FriendsModel(data_set, friend_data)
    similarity = CosineSimilarity(i_model)

    return similarity