Esempio n. 1
0
    #print sum(v.values())
    if user_len > 0:
        #vec = [jvc_grams_count[idx].get(w, 0) for w in new_sw_list] #stopword without function words
        
        vec = [dict_user_count_ans[uid].get(w, 0) for w in selected_sw]
        g_vec = [float(x) / user_len for x in vec]
        general_vec_ans[uid] = g_vec

##calculate similarity for each user pair

import pttfunc
import numpy as np
sim_list = np.array((0.0, 0.0, 0.0))
for i in xrange(len(user_list)):
    for j in xrange(len(user_list)):
        wj_sw = pttfunc.weighted_jaccard(general_vec[user_list[i]], general_vec_ans[user_list[j]])
        sim_list = np.vstack((sim_list, np.array((round(float(i),1), round(float(j),1), wj_sw))))

#sorting
sim_list = sim_list[1:]
sim_list = sim_list[sim_list[:,2].argsort()]
sim_list = sim_list[::-1]

#make final list
user_sim_list = []
for i, j, sim in sim_list:
    user_sim_list.append((user_list[int(i)], user_list[int(j)] + 'ANS', sim))

## evaluation
a = 0
for i,j,sim in user_sim_list:
Esempio n. 2
0
    #print sum(v.values())
    if user_len > 0:
        #vec = [jvc_grams_count[idx].get(w, 0) for w in new_sw_list] #stopword without function words

        vec = [dict_user_count_ans[uid].get(w, 0) for w in selected_sw]
        g_vec = [float(x) / user_len for x in vec]
        general_vec_ans[uid] = g_vec

##calculate similarity for each user pair

import pttfunc
import numpy as np
sim_list = np.array((0.0, 0.0, 0.0))
for i in xrange(len(user_list)):
    for j in xrange(len(user_list)):
        wj_sw = pttfunc.weighted_jaccard(general_vec[user_list[i]],
                                         general_vec_ans[user_list[j]])
        sim_list = np.vstack(
            (sim_list, np.array((round(float(i), 1), round(float(j),
                                                           1), wj_sw))))

#sorting
sim_list = sim_list[1:]
sim_list = sim_list[sim_list[:, 2].argsort()]
sim_list = sim_list[::-1]

#make final list
user_sim_list = []
for i, j, sim in sim_list:
    user_sim_list.append((user_list[int(i)], user_list[int(j)] + 'ANS', sim))

## evaluation
Esempio n. 3
0
zero_percent = 0.9  #0.997056899935


def vec_gen():

    import random
    #random.uniform(0.0, 0.9)
    #vec = [random.uniform(0.0, 0.9) for i in range(N_sw)]
    vec = [0.0] * N_sw
    for i in xrange(len(vec)):
        if random.randint(0, 9) == 9:
            vec[i] = random.uniform(0.0, 0.9)

    return vec


vec1_list = [vec_gen() for i in range(N_user)]
vec2_list = [vec_gen() for i in range(N_user)]

t_start = time.time()
size = N_user * N_user
sim = np.array(np.arange(size), dtype='float64')
idx = 0
for i in xrange(N_user):
    for j in xrange(N_user):
        sim[idx] = pttfunc.weighted_jaccard(vec1_list[i], vec2_list[j])
        idx += 1
sim = sim[:idx]
t_stop = time.time()

print t_stop - t_start
Esempio n. 4
0
N_sw = 302
N_user = 203 #20342
zero_percent = 0.9 #0.997056899935
def vec_gen():
 
    import random
    #random.uniform(0.0, 0.9)
    #vec = [random.uniform(0.0, 0.9) for i in range(N_sw)]
    vec = [0.0] * N_sw
    for i in xrange(len(vec)):
        if random.randint(0,9) == 9:
            vec[i] = random.uniform(0.0, 0.9)

    return vec

vec1_list = [vec_gen() for i in range(N_user)]
vec2_list = [vec_gen() for i in range(N_user)]

t_start = time.time()
size = N_user * N_user
sim = np.array(np.arange(size), dtype='float64')
idx = 0
for i in xrange(N_user):
    for j in xrange(N_user):
        sim[idx] = pttfunc.weighted_jaccard(vec1_list[i], vec2_list[j])
        idx += 1
sim = sim[:idx]
t_stop = time.time()

print t_stop - t_start