def calculate_rate_average_global(raw_data, friend_data): friend_model = FriendsModel(raw_data,friend_data) jaccard_sim = CosineSimilarity(friend_model) roster = friend_model.get_friends_roster() the_variance = {} for u_id in roster: f_ids = friend_model.get_strangers_in_roster(u_id) f_num = len(friend_model.get_friends(u_id)) sorted_sims = jaccard_sim.get_similarities(u_id,f_ids)[:f_num] t_ids = [] for u_id, sim in sorted_sims: t_ids.append(u_id) mean_rates = friend_model.get_multiple_users_mean_rate(t_ids) i_rate = friend_model.get_user_rate_mean(u_id) the_means = [] for ky in mean_rates.keys(): if mean_rates[ky] == 0: continue the_means.append(mean_rates[ky]) if not i_rate == 0: #should always be here the_means.append(i_rate) the_variance[u_id] = np.var(the_means) return the_variance
def get_friends_from_ml100k(): dst_file = "./src/dataset/movie100k/ml100kfriend.dat" if path.isfile(dst_file): friend_data = pickle.load(open(dst_file, "rb")) return friend_data raw_data = get_moive100k(True) raw_model = Model(raw_data) cosine_sim = CosineSimilarity(raw_model) friend_data = {} for user_id in raw_model.get_user_ids(): neighbors = cosine_sim.get_similarities(user_id)[:250] user_ids, x = zip(*neighbors) user_ids = list(user_ids) shuffle(user_ids) # note: # Randomly choose 150 out of 250 neighbors as friends. # In such case, systems is able to (possiblly) choose strangers which # are in top-250 similar users, but with a probability slightly # smaller than friends selection. friend_data[user_id] = user_ids[:150] pickle.dump(friend_data, open(dst_file, "w"), protocol=2) return friend_data
def calculate_rate_average_global(raw_data, friend_data): friend_model = FriendsModel(raw_data, friend_data) jaccard_sim = CosineSimilarity(friend_model) roster = friend_model.get_friends_roster() the_variance = {} for u_id in roster: f_ids = friend_model.get_strangers_in_roster(u_id) f_num = len(friend_model.get_friends(u_id)) sorted_sims = jaccard_sim.get_similarities(u_id, f_ids)[:f_num] t_ids = [] for u_id, sim in sorted_sims: t_ids.append(u_id) mean_rates = friend_model.get_multiple_users_mean_rate(t_ids) i_rate = friend_model.get_user_rate_mean(u_id) the_means = [] for ky in mean_rates.keys(): if mean_rates[ky] == 0: continue the_means.append(mean_rates[ky]) if not i_rate == 0: #should always be here the_means.append(i_rate) the_variance[u_id] = np.var(the_means) return the_variance
def get_friends_from_ml100k(): dst_file = "./src/dataset/movie100k/ml100kfriend.dat" if path.isfile(dst_file): friend_data = pickle.load(open(dst_file, "rb")) return friend_data raw_data = get_moive100k(True); raw_model = Model(raw_data); cosine_sim = CosineSimilarity(raw_model) friend_data = {} for user_id in raw_model.get_user_ids(): neighbors = cosine_sim.get_similarities(user_id)[:250] user_ids, x = zip(*neighbors) user_ids = list(user_ids) shuffle(user_ids) # note: # Randomly choose 150 out of 250 neighbors as friends. # In such case, systems is able to (possiblly) choose strangers which # are in top-250 similar users, but with a probability slightly # smaller than friends selection. friend_data[user_id] = user_ids[:150] pickle.dump(friend_data,open(dst_file, "w"),protocol=2) return friend_data
def calculate_cosine_similarity_global(raw_data, friend_data): friend_model = FriendsModel(raw_data, friend_data) cosine_sim = CosineSimilarity(friend_model) c_sims = {} for u_id in friend_model.get_friends_roster(): f_num = len(friend_model.get_friends(u_id)) target_ids = friend_model.get_strangers_in_roster(u_id) sorted_sims = cosine_sim.get_similarities(u_id, target_ids) c_sims[u_id] = sorted_sims[:f_num] return c_sims
def calculate_cosine_similarity_friends(raw_data, friend_data): friend_model = FriendsModel(raw_data, friend_data) cosine_sim = CosineSimilarity(friend_model) c_sims = {} for ky in friend_model.get_friends_roster(): f_ids = friend_model.get_friends(ky) sorted_sims = cosine_sim.get_similarities(ky, f_ids) c_sims[ky] = sorted_sims return c_sims
def calculate_cosine_similarity_global(raw_data,friend_data): friend_model = FriendsModel(raw_data,friend_data) cosine_sim = CosineSimilarity(friend_model) c_sims = {} for u_id in friend_model.get_friends_roster(): f_num = len(friend_model.get_friends(u_id)) target_ids = friend_model.get_strangers_in_roster(u_id) sorted_sims = cosine_sim.get_similarities(u_id,target_ids) c_sims[u_id] = sorted_sims[:f_num] return c_sims
def calculate_cosine_similarity_friends(raw_data,friend_data): friend_model = FriendsModel(raw_data,friend_data) cosine_sim = CosineSimilarity(friend_model) c_sims = {} for ky in friend_model.get_friends_roster(): f_ids = friend_model.get_friends(ky) sorted_sims = cosine_sim.get_similarities(ky, f_ids) c_sims[ky] = sorted_sims return c_sims
def Generate_Simulating_Data_on_MovieLens(user_id, f_num, t_num): friend_data = get_friends_from_ml100k() raw_data = get_moive100k() friend_model = FriendsModel(raw_data, friend_data) cosine_sim = CosineSimilarity(friend_model) fs = Friends_Strangers(cosine_sim, f_num, t_num) friends = fs.get_rand_friends(user_id) strangers = fs.get_rand_strangers(user_id)
def generate_simulating_data(raw_data, friend_data, user_id, f_num, t_num, filelocation): if not user_id in friend_data: print "invalid user_id!" return if not path.exists(filelocation): makedirs(filelocation) friend_model = FriendsModel(raw_data, friend_data) cosine_sim = CosineSimilarity(friend_model) fs = Friends_Strangers(cosine_sim, f_num, t_num) friends = fs.get_rand_friends(user_id) strangers = fs.get_rand_strangers(user_id) f_ids, f_sims = zip(*friends) t_ids, _ = zip(*strangers) f_sims = np.dot(f_sims, 8).astype(np.int32).tolist() #for HE #print f_sims f1 = filelocation + "similarity_" + str(user_id) + "_" + str( f_num) + ".dat" save_list_to_file(f1, [f_sims]) #friends similarity diskfriend = [] for f_id in f_ids: vec = friend_model.get_dense_user_vector(f_id) vec = np.dot(vec, 16).astype(np.int32).tolist() #for HE #vec.tolist() diskfriend.append(vec) f2 = filelocation + "friend_" + str(user_id) + "_" + str(f_num) + ".dat" save_list_to_file(f2, diskfriend) #friends rating data diskstranger = [] t_ids, __ = zip(*strangers) for t_id in t_ids: vec = friend_model.get_dense_user_vector(f_id) vec = np.dot(vec, 16).astype(np.int32).tolist() #for HE diskstranger.append(vec) f3 = filelocation + "stranger_" + str(user_id) + "_" + str(t_num) + ".dat" save_list_to_file(f3, diskstranger) #friends rating data
def get_similarity_matrix(data_set, friend_data): i_model = FriendsModel(data_set, friend_data) similarity = CosineSimilarity(i_model) return similarity