-
Notifications
You must be signed in to change notification settings - Fork 0
/
clusters6.py
66 lines (59 loc) · 1.99 KB
/
clusters6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
__author__ = 'Nishant'
from lru import LRU
import numpy as np
class topic4:
def __init__(self, c_hash, c_user, c_words):
self.topic_count =1
self.l1 = LRU(c_hash)
self.l2 = LRU(c_user)
self.l3 = LRU(c_words)
def set_hashLRU(self,l):
self.set(self.l1, l)
def set_userLRU(self,l):
self.set(self.l2, l)
def set_wordLRU(self,l):
self.set(self.l3, l)
def set(self, lru, l):
for k in l:
v = lru.get(k,0)
lru[k]=v+1
def set_cluster(self, hashtags, users, words):
for k in hashtags:
self.l1[k]=self.l1.get(k,0)+1
for k in users:
self.l2[k]=self.l2.get(k,0)+1
for k in words:
self.l3[k]=self.l3.get(k,0)+1
self.topic_count+=1
def get_similarity(self,hashtags,users,words):
h_sum = 1
u_sum = 1
w_sum = 1
h_match =0
h_ind =0
u_ind =0
w_ind =0
c=0
h1 = self.l1.get_size()
u1 = self.l2.get_size()
w1 = self.l3.get_size()
for h in hashtags:
# l1_items=zip(*self.l1.items())
h_sum+= self.l1.get(h,0)
if(self.l1.has_key(h)):
ind = self.l1.keys().index(h)
h_ind+= h1 - ind
h_match+= 1 if ind<250 else 0
for u in users:
u_sum+= self.l2.get(u,0)
if(self.l2.has_key(u)):
u_ind+= u1 - self.l2.keys().index(u)
for w in words:
w_sum+= self.l3.get(w,0)
if(self.l3.has_key(w)):
w_ind+= w1 - self.l3.keys().index(w)
if(h_match !=0):
c = h_match -1
# print(h_ind,h1,u_ind,u1,w_ind,w1, h_sum,w_sum,)
similarity = (h_ind/(h1+1))*(h_sum/sum(self.l1.values() +[1])) + (u_ind/(u1+1))*(u_sum/sum(self.l2.values()+[1])) + (w_ind/(w1+1))*(w_sum/sum(self.l3.values()+[1])) +c
return similarity