#print sum(v.values()) if user_len > 0: #vec = [jvc_grams_count[idx].get(w, 0) for w in new_sw_list] #stopword without function words vec = [dict_user_count_ans[uid].get(w, 0) for w in selected_sw] g_vec = [float(x) / user_len for x in vec] general_vec_ans[uid] = g_vec ##calculate similarity for each user pair import pttfunc import numpy as np sim_list = np.array((0.0, 0.0, 0.0)) for i in xrange(len(user_list)): for j in xrange(len(user_list)): wj_sw = pttfunc.weighted_jaccard(general_vec[user_list[i]], general_vec_ans[user_list[j]]) sim_list = np.vstack((sim_list, np.array((round(float(i),1), round(float(j),1), wj_sw)))) #sorting sim_list = sim_list[1:] sim_list = sim_list[sim_list[:,2].argsort()] sim_list = sim_list[::-1] #make final list user_sim_list = [] for i, j, sim in sim_list: user_sim_list.append((user_list[int(i)], user_list[int(j)] + 'ANS', sim)) ## evaluation a = 0 for i,j,sim in user_sim_list:
#print sum(v.values()) if user_len > 0: #vec = [jvc_grams_count[idx].get(w, 0) for w in new_sw_list] #stopword without function words vec = [dict_user_count_ans[uid].get(w, 0) for w in selected_sw] g_vec = [float(x) / user_len for x in vec] general_vec_ans[uid] = g_vec ##calculate similarity for each user pair import pttfunc import numpy as np sim_list = np.array((0.0, 0.0, 0.0)) for i in xrange(len(user_list)): for j in xrange(len(user_list)): wj_sw = pttfunc.weighted_jaccard(general_vec[user_list[i]], general_vec_ans[user_list[j]]) sim_list = np.vstack( (sim_list, np.array((round(float(i), 1), round(float(j), 1), wj_sw)))) #sorting sim_list = sim_list[1:] sim_list = sim_list[sim_list[:, 2].argsort()] sim_list = sim_list[::-1] #make final list user_sim_list = [] for i, j, sim in sim_list: user_sim_list.append((user_list[int(i)], user_list[int(j)] + 'ANS', sim)) ## evaluation
zero_percent = 0.9 #0.997056899935 def vec_gen(): import random #random.uniform(0.0, 0.9) #vec = [random.uniform(0.0, 0.9) for i in range(N_sw)] vec = [0.0] * N_sw for i in xrange(len(vec)): if random.randint(0, 9) == 9: vec[i] = random.uniform(0.0, 0.9) return vec vec1_list = [vec_gen() for i in range(N_user)] vec2_list = [vec_gen() for i in range(N_user)] t_start = time.time() size = N_user * N_user sim = np.array(np.arange(size), dtype='float64') idx = 0 for i in xrange(N_user): for j in xrange(N_user): sim[idx] = pttfunc.weighted_jaccard(vec1_list[i], vec2_list[j]) idx += 1 sim = sim[:idx] t_stop = time.time() print t_stop - t_start
N_sw = 302 N_user = 203 #20342 zero_percent = 0.9 #0.997056899935 def vec_gen(): import random #random.uniform(0.0, 0.9) #vec = [random.uniform(0.0, 0.9) for i in range(N_sw)] vec = [0.0] * N_sw for i in xrange(len(vec)): if random.randint(0,9) == 9: vec[i] = random.uniform(0.0, 0.9) return vec vec1_list = [vec_gen() for i in range(N_user)] vec2_list = [vec_gen() for i in range(N_user)] t_start = time.time() size = N_user * N_user sim = np.array(np.arange(size), dtype='float64') idx = 0 for i in xrange(N_user): for j in xrange(N_user): sim[idx] = pttfunc.weighted_jaccard(vec1_list[i], vec2_list[j]) idx += 1 sim = sim[:idx] t_stop = time.time() print t_stop - t_start