-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
80 lines (67 loc) · 2.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
from collections import defaultdict
import math
import make_dic as md
from subnetwork import SubNetwork
from index import Indexer
from six.moves import cPickle
from sqltostc import all_tweets
import sqlconfig
net = SubNetwork()
print "load tweet pairs....."
with open('tweet_dic.pkl', 'r') as f:
source_dic = cPickle.load(f)
net.set_source(source_dic)
print "Tweet Pairs loaded: len(pairs) -> " + str(len(source_dic))
print "load index....."
indexer = Indexer()
indexer.load("./index.pkl")
print "Index loaded"
print "word count / tweet dic....."
with open('./word_count.pkl', 'r') as f:
wc_dic = cPickle.load(f)
print "dic loaded"
def retrieve_replies(input):
text = input
noun_list = md.noun_list(text)
net.gen_sub_network(noun_list)
queries = net.page_rank()
results = {}
results = defaultdict(int)
for query in queries:
word = query
score = queries[word]
tuple_list = indexer.search(word)
df = len(tuple_list)
for tup in tuple_list:
s = score * idf(df)
results = indexer.update_replies(results, tup, s)
results = tuples_from_dict(normalize(results, wc_dic))
return results
# return normalize(results, wc_dic)
def idf(df):
N = len(source_dic)
return math.log(N/df + 1,10)
def tuples_from_dict(dic):
return sorted(dic.items(), key=lambda x:x[1]*100000, reverse=True)
def normalize(results, dic):
for tweet in results:
results[tweet] = results[tweet] / (dic[tweet] + 1)
return results
def test_data():
return all_tweets(sqlconfig.run_table_name)
def main():
tuples = []
inputs = test_data()
input_keys = inputs.keys()[:2]
tweet_num = len(input_keys)
f = open('replies.txt', 'w')
for (i,input) in enumerate(input_keys):
print "STCINFO: " + str(i+1) + " of " + str(tweet_num) + "@Twitter ID ->" + input
replies = [(input,) + tup for tup in retrieve_replies(inputs[input])]
for i in range(10):
f.write(str(replies[i]) + '\n')
tuples.append(replies[i])
f.close()
if __name__ == '__main__':
main()